import data


In [1]:
import sqlite3
conn = sqlite3.connect('pmcv1-full.db')
c = conn.cursor()

available tables

  • refs: pmid integer, refpmid integer
  • pmcidmap: pmid integer, pmcid integer
  • meta: pmid integer, title text, journal_id text
  • authors: pmid integer, authnum integer, fn text, ln text
  • keywords: pmid integer, keyword text
  • abstracts: pmid integer, abstract text

SQL tables:

generate full graph and plot in and out degree


In [1]:
import graph_tool as gt
g = gt.Graph(directed = True) pmid_vertex_dict = dict() for pair in c.execute('''SELECT pmid, refpmid FROM refs'''): if pair[0] not in pmid_vertex_dict: v = g.add_vertex() pmid_vertex_dict[pair[0]] = int(v) if pair[1] not in pmid_vertex_dict: v = g.add_vertex() pmid_vertex_dict[pair[1]] = int(v) g.add_edge(pmid_vertex_dict[pair[0]], pmid_vertex_dict[pair[1]])

In [2]:
import cPickle as pickle
pickle.dump(g, open("full_graph.p", "wb")) pickle.dump(pmid_vertex_dict, open("full_graph_pmid_vertex_dict.p", "wb"))

In [4]:
g = pickle.load(open("full_graph.p", "rb"))
pmid_vertex_dict = pickle.load(open("full_graph_pmid_vertex_dict.p", "rb"))

In [5]:
#plots
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

In [6]:
#plot in degree
indegree = []
for vertex in g.vertices():
    indegree.append(vertex.in_degree())
plt.hist(indegree, np.arange(0,1000,20), log = True)


Out[6]:
(array([  7.55125200e+06,   1.43924000e+05,   3.12780000e+04,
          1.15570000e+04,   5.62600000e+03,   3.09700000e+03,
          1.96600000e+03,   1.29700000e+03,   8.93000000e+02,
          6.29000000e+02,   4.42000000e+02,   3.96000000e+02,
          2.73000000e+02,   2.54000000e+02,   1.74000000e+02,
          1.53000000e+02,   1.43000000e+02,   1.01000000e+02,
          7.70000000e+01,   8.10000000e+01,   6.30000000e+01,
          6.40000000e+01,   4.90000000e+01,   4.50000000e+01,
          3.80000000e+01,   3.80000000e+01,   3.10000000e+01,
          2.90000000e+01,   2.80000000e+01,   1.70000000e+01,
          2.20000000e+01,   1.50000000e+01,   1.40000000e+01,
          1.20000000e+01,   1.90000000e+01,   2.10000000e+01,
          1.00000000e+01,   1.10000000e+01,   1.20000000e+01,
          7.00000000e+00,   1.00000000e+01,   8.00000000e+00,
          8.00000000e+00,   9.00000000e+00,   1.10000000e+01,
          1.10000000e+01,   7.00000000e+00,   5.00000000e+00,
          3.00000000e+00]),
 array([  0,  20,  40,  60,  80, 100, 120, 140, 160, 180, 200, 220, 240,
        260, 280, 300, 320, 340, 360, 380, 400, 420, 440, 460, 480, 500,
        520, 540, 560, 580, 600, 620, 640, 660, 680, 700, 720, 740, 760,
        780, 800, 820, 840, 860, 880, 900, 920, 940, 960, 980]),
 <a list of 49 Patch objects>)

In [7]:
#plot out degree
outdegree = []
for vertex in g.vertices():
    outdegree.append(vertex.out_degree())
plt.hist(outdegree, np.arange(1,500,10), log = True)


Out[7]:
(array([  1.37930000e+05,   1.81997000e+05,   1.88491000e+05,
          1.52772000e+05,   1.05358000e+05,   6.18250000e+04,
          3.43840000e+04,   1.93530000e+04,   1.21570000e+04,
          8.02800000e+03,   5.55600000e+03,   4.07300000e+03,
          3.14300000e+03,   2.49200000e+03,   1.94900000e+03,
          1.54000000e+03,   1.18800000e+03,   9.34000000e+02,
          7.56000000e+02,   6.90000000e+02,   5.19000000e+02,
          3.97000000e+02,   3.38000000e+02,   2.68000000e+02,
          2.10000000e+02,   1.96000000e+02,   1.37000000e+02,
          1.32000000e+02,   8.90000000e+01,   8.60000000e+01,
          7.80000000e+01,   6.00000000e+01,   5.70000000e+01,
          3.30000000e+01,   3.70000000e+01,   4.20000000e+01,
          2.90000000e+01,   2.20000000e+01,   1.90000000e+01,
          2.00000000e+01,   9.00000000e+00,   1.30000000e+01,
          1.30000000e+01,   1.20000000e+01,   8.00000000e+00,
          8.00000000e+00,   6.00000000e+00,   6.00000000e+00,
          1.00000000e+01]),
 array([  1,  11,  21,  31,  41,  51,  61,  71,  81,  91, 101, 111, 121,
        131, 141, 151, 161, 171, 181, 191, 201, 211, 221, 231, 241, 251,
        261, 271, 281, 291, 301, 311, 321, 331, 341, 351, 361, 371, 381,
        391, 401, 411, 421, 431, 441, 451, 461, 471, 481, 491]),
 <a list of 49 Patch objects>)

In [8]:
# Let's plot its in-degree distribution
import graph_tool.stats as gtstats
in_hist = gtstats.vertex_hist(g, "in")

y = in_hist[0]
plt.figure(figsize=(10,6))
plt.errorbar(in_hist[1][:-1], in_hist[0], fmt="o", label="in")
plt.gca().set_yscale("log")
plt.gca().set_xlim(1, 1e3)
plt.gca().set_ylim(1.5,)
plt.xlabel("$k_{in}$")
plt.ylabel("$NP(k_{in})$")
plt.tight_layout()



In [11]:
#to save for web
# Let's plot its in-degree distribution
import graph_tool.stats as gtstats
in_hist = gtstats.vertex_hist(g, "in")

import matplotlib 
matplotlib.rc('xtick', labelsize=14) 
matplotlib.rc('ytick', labelsize=14)

y = in_hist[0]
plt.figure(figsize=(6,4))
plt.errorbar(in_hist[1][:-1], in_hist[0], fmt="o", label="in", alpha=0.5)
plt.gca().set_yscale("log")
plt.gca().set_xlim(1, 1e3)
plt.gca().set_ylim(1.5,)
plt.xlabel("Number of in-citations", fontsize=16)
plt.ylabel("Counts", fontsize=16)
plt.tight_layout()
plt.savefig('incites.svg')



In [7]:
# Let's plot its in-degree distribution
import graph_tool.stats as gtstats
in_hist = gtstats.vertex_hist(g, "out")

y = in_hist[0]
plt.figure(figsize=(10,6))
plt.errorbar(in_hist[1][:-1], in_hist[0], fmt="o", label="out")
plt.gca().set_yscale("log")
plt.gca().set_xlim(1, 600)
plt.gca().set_ylim(1.5,3e4)
plt.xlabel("$k_{in}$")
plt.ylabel("$NP(k_{in})$")
plt.tight_layout()



In [12]:
#to save for web
# Let's plot its in-degree distribution
import graph_tool.stats as gtstats
in_hist = gtstats.vertex_hist(g, "out")

import matplotlib 
matplotlib.rc('xtick', labelsize=14) 
matplotlib.rc('ytick', labelsize=14)

y = in_hist[0]
plt.figure(figsize=(6,4))
plt.errorbar(in_hist[1][:-1], in_hist[0], fmt="o", label="out", alpha = 0.5)
plt.gca().set_yscale("log")
plt.gca().set_xlim(1, 600)
plt.gca().set_ylim(1.5,3e4)
plt.xlabel("Number of out-citations", fontsize=16)
plt.ylabel("Counts", fontsize=16)
plt.tight_layout()
plt.savefig('outcites.svg')



In [28]:
g


Out[28]:
<Graph object, directed, with 7754378 vertices and 31231542 edges at 0x1154381d0>

Generate small graph for only a PMID's neighbors to a depth of two in both directions

initially, because it's much easier, only look at outgoing neighbors


In [59]:
rootpmid = 26247944
c.execute('''SELECT refpmid FROM refs WHERE pmid = ?''', [rootpmid])
output = c.fetchall()
print output


[(19464502,), (16130266,), (23897068,), (11719411,), (18241089,), (10739516,), (20502295,), (22441437,), (22495898,), (23362106,), (20664402,), (20166149,), (21386680,), (21520318,), (19777530,), (21257082,), (16374755,), (21439542,), (18817592,), (24150027,), (19516271,), (21474517,), (21185077,), (18991324,), (17322170,), (17071607,), (17368534,), (8405630,), (11844193,), (12420836,), (17899573,), (20232288,), (17149788,), (18332385,), (22753319,), (22846473,), (25136369,), (21115197,), (21295850,), (21254145,), (18480117,), (18259003,), (24752037,), (21389681,), (21210481,), (19656558,), (24970282,), (13680354,), (25653039,), (24959436,), (14764923,), (12618519,), (11943725,), (24166749,), (23331974,), (22906914,), (24444293,), (17553534,), (16957146,), (10077652,), (19215554,), (14678084,), (12648442,), (11932314,), (10875882,), (18771979,), (18655226,), (21252475,), (15280143,), (14710054,), (19497882,), (22036528,), (18259009,), (18753763,), (19035985,), (17618748,), (24607918,), (10849447,), (11095942,), (18264967,), (18978109,), (15994630,), (23980220,), (11040958,), (17227816,), (19436062,), (12000727,), (16647920,), (15257701,), (16102023,), (1372646,), (15118653,), (9065199,), (11239638,), (12624632,), (17365856,), (12049188,), (10471140,), (8572020,), (12422905,), (25280210,), (19879940,), (24185004,), (18166190,), (25028703,), (23300202,), (13956740,), (22544937,), (15343229,), (24103646,), (16877342,), (9931039,), (19602262,), (24852111,), (15867337,), (17322379,), (23526657,), (16224045,), (21093846,), (21079238,), (22438230,), (23830491,), (17346547,), (19285651,), (25981845,), (22364204,), (22251611,), (22852344,), (18218722,), (25346181,), (24356462,), (25122928,), (23890710,), (20598363,), (15572504,), (23280734,), (25530428,)]

In [51]:
for pmid in output:
    c.execute('''SELECT refpmid FROM refs WHERE pmid = ?''', [pmid[0]])
    print c.fetchall()


[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[(15733721,), (15947178,), (15743856,), (18259010,), (19541696,), (9725924,), (18480117,), (18259003,), (12796047,), (19087081,), (19369368,), (14662228,), (15760998,), (15924523,), (19516271,), (17443552,), (20687064,), (6668417,), (15094256,), (1393159,), (15856728,), (19155903,), (10925900,), (15387864,), (19477892,), (19336487,), (19502216,), (17556761,), (12193937,), (9396883,), (16098866,), (18475158,), (18725583,), (17903233,), (16169871,), (16157095,), (16678332,), (17975258,), (14520224,), (15042008,), (10521771,), (18819756,), (19586138,), (19273739,), (19813221,)]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[(19464502,), (18241089,), (11719411,), (12908998,), (20664402,), (22441437,), (22495898,), (22846473,), (22906914,), (24150027,), (19516271,), (6054057,), (3790464,), (1946241,), (11566964,), (12420836,), (11844193,), (17899573,), (16579806,), (22248988,), (15611362,), (9607693,), (10673728,), (15957992,), (7364175,), (7076094,), (3970084,), (2726114,), (8116715,), (8961572,), (9091016,), (9790376,), (9699746,), (10455476,), (11117771,), (10717813,), (10739512,), (11846705,), (12366481,), (14572364,), (15258539,), (20818956,), (18480117,), (24356462,), (7526035,), (10077652,), (12648442,), (14678084,), (19215554,), (11028579,), (11932314,), (12224070,), (15507981,), (16086443,), (17221926,), (17149788,), (12618519,), (15472115,), (14764923,), (17261644,), (11384693,), (11349200,), (11576575,), (12798535,), (15117604,), (14764795,), (18956425,), (18058842,), (18446652,), (18432600,), (22286035,), (19273739,), (21210481,), (24970282,), (15529289,), (22261192,), (21827221,), (24190934,), (23966056,), (24853452,), (24902880,), (21389681,)]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[(20482241,), (22018452,), (22267116,), (12908996,), (9494145,), (22102516,), (12618519,), (19850276,), (22000672,), (20232288,), (16098866,), (11932314,), (10875882,), (18771979,), (10940198,), (12850627,), (17299557,), (3105664,), (18058842,), (17212667,), (11932283,), (22542116,), (10967166,), (15743856,), (22647784,), (10077652,), (2662194,), (11844162,), (18332385,), (11197372,), (16202744,), (20150910,), (21867402,), (2200862,), (22986844,), (23557166,), (20818956,), (23929878,)]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[(15836870,), (15947178,), (15733721,), (17433431,), (17169180,), (17255351,), (18166303,), (8601931,), (12798543,), (17443552,), (17065044,), (16738957,), (17621696,), (16682385,), (12089065,), (15840747,), (12618519,), (2589440,), (18259009,), (19070896,), (18410657,), (12908998,), (13680354,), (8619929,), (8671397,), (9834463,), (8919031,), (11750869,), (10084962,), (9364601,), (9828203,), (15284729,), (15126581,), (15805796,), (9855593,), (14602804,), (16813756,), (17540809,), (17426082,), (15472115,), (15804781,), (14764923,), (12798535,), (14764795,), (11349200,), (11576575,), (18175241,), (17826405,), (16395263,), (15632339,), (16263826,), (17261644,), (18175243,), (1326540,), (7528549,), (12773481,), (15718503,), (16751767,), (18462791,), (17618745,), (16957146,), (17515455,), (17689640,), (18226617,), (18446652,), (15257701,), (16102023,), (1372646,), (15118653,), (9065199,), (11239638,), (12624632,), (17365856,), (12049188,), (12422905,), (8572020,), (10471140,), (17654753,), (11467696,), (17706337,), (9274585,), (15980640,), (15251949,), (18425486,), (12592259,), (11038466,), (12522563,), (12560345,), (18395872,), (16098864,), (9931039,), (12044339,), (15042003,), (18425484,), (15386446,), (15386451,), (11901066,), (16203989,), (17068167,), (17994635,), (17139698,), (9254906,), (14519395,), (16052496,), (9417060,), (16892462,), (15662668,), (14691998,), (17701664,), (17987603,), (15994630,), (18264967,), (18978109,), (6856484,), (11742106,), (15009185,), (10527825,), (19087972,), (18753763,), (17618748,), (17149788,), (15193867,), (19418482,), (18539259,), (16374755,), (17278173,), (12000727,), (1429570,), (7949102,), (16981714,), (18048494,), (16647920,), (16816069,), (10207505,), (11897673,), (10077652,), (11161967,), (15919757,), (17332520,), (17278174,), (15507981,), (15507982,), (11028579,), (11932314,), (17149788,), (18000943,), (17992705,), (14663836,), (12224070,), (6191321,), (13416279,), (15604363,), (17003355,), (16720654,), (17302880,), (16489932,), (18382866,), (17302880,), (8387282,), (10782362,), (12364390,), (16141406,), (18382699,), (14671210,), (10194466,), (18398593,), (16260641,), (15703421,), (18259044,), (19064815,), (10688503,), (18332385,), (18669736,), (18814185,), (18533121,), (16247621,), (12087083,), (16651850,), (16556680,), (15954879,), (17261642,), (14648175,), (18425489,), (12644709,), (15161720,), (15173234,), (12709362,), (14750173,), (18425492,), (18287768,), (17443845,), (16914840,), (17374602,), (18339795,), (18573524,), (18492697,)]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[(9274585,), (11106320,), (15993701,), (15060106,), (20014413,), (22752761,), (9931039,), (11148193,), (12044339,), (15251945,), (15042003,), (16020496,), (17191257,), (23466432,), (23021026,), (19631923,), (22544937,), (22552373,), (12592259,), (16832835,), (19121662,), (19816026,), (22871389,), (12423866,), (16098864,), (21915884,), (22711432,), (22717986,), (22836687,), (23187089,), (11719487,), (14500595,), (10486150,)]

In [60]:
def addedge(graphobject, source, dest, vertexdict):
    if source not in vertexdict:
        v = graphobject.add_vertex()
        vertexdict[source] = int(v)
    if dest not in vertexdict:
        v = graphobject.add_vertex()
        vertexdict[dest] = int(v)
    graphobject.add_edge(vertexdict[source], vertexdict[dest])
    return graphobject, vertexdict

In [65]:
minigraph = gt.Graph()
rootpmid = 26247944
c.execute('''SELECT refpmid FROM refs WHERE pmid = ?''', [rootpmid])
children = c.fetchall()
vertexdict = dict()
for child in children:
    minigraph, vertexdict = addedge(minigraph, rootpmid, child[0], vertexdict)
    c.execute('''SELECT refpmid FROM refs WHERE pmid = ?''', [child[0]])
    secondchildren = c.fetchall()
    for child2 in secondchildren:
        minigraph, vertexdict = addedge(minigraph, child[0], child2[0], vertexdict)

In [78]:
minigraph


Out[78]:
<Graph object, directed, with 438 vertices and 531 edges at 0x116daa7d0>

In [212]:
import graph_tool.all as gt
import math

In [213]:
deg = minigraph.degree_property_map("in")
#deg.a = 4 * (math.sqrt(deg.a) * 0.5 + 0.4)
gt.graph_draw(minigraph, vertex_fill_color=deg)


Out[213]:
<PropertyMap object with key type 'Vertex' and value type 'vector<double>', for Graph 0x116daa7d0, at 0x116804090>

In [ ]:


In [ ]:
# CO-AUTHORSHIP NETWORK???
# https://graph-tool.skewed.de/static/doc/draw.html

In [ ]:

rewrite above without slow sql commands using graph network


In [150]:
rev_pmid_vertex_dict = {v: k for k, v in pmid_vertex_dict.items()}

In [202]:
def addedge(graphobject, source, dest, vertexdict):
    if source not in vertexdict:
        v = graphobject.add_vertex()
        vertexdict[source] = int(v)
    if dest not in vertexdict:
        v = graphobject.add_vertex()
        vertexdict[dest] = int(v)
    graphobject.add_edge(vertexdict[source], vertexdict[dest])
    return graphobject, vertexdict

In [258]:
rootpmid = 26247944
minigraph2 = gt.Graph()
vertexdict2 = dict()
for neigh in g.vertex(pmid_vertex_dict[rootpmid]).out_neighbours(): 
    minigraph2, vertexdict2 = addedge(minigraph2, rootpmid, neigh, vertexdict2)
    try:
        for neigh2 in g.vertex(neigh).out_neighbours():
            minigraph2, vertexdict2 = addedge(minigraph2, neigh, neigh2, vertexdict2)
    except KeyError:
        print "{} not in graph g".format(neigh)

In [259]:
deg = minigraph2.degree_property_map("in")
gt.graph_draw(minigraph2, vertex_fill_color=deg)


Out[259]:
<PropertyMap object with key type 'Vertex' and value type 'vector<double>', for Graph 0x116ada850, at 0x117640950>

In [273]:
deg = minigraph2.degree_property_map("in")
deg.a = 2 * (np.sqrt(deg.a) * 0.5 + 0.4)
ebet = gt.betweenness(minigraph2)[1]
### for inline support
from IPython.display import SVG, display
###
gt.graphviz_draw(minigraph2, vcolor=deg, vorder=deg, elen=10, ecolor=ebet, 
                 eorder=ebet, output = "/tmp/3961631087.svg", output_format="svg",
                size = (25,25))
display(SVG(filename = "/tmp/3961631087.svg"))


G 0--2 0--3 0--6 0--7 0--10 0--12 0--13 0--14 0--15 0--16 0--18 0--19 0--22 0--67 0--68 0--69 0--70 0--71 0--72 0--79 0--81 0--149 0--150 0--151 0--152 0--153 0--155 0--156 0--157 0--158 0--159 0--160 0--190 0--192 0--193 0--194 0--195 0--196 0--197 0--200 0--202 0--203 0--204 0--208 0--209 0--210 0--211 0--226 0--227 0--228 0--229 0--230 0--231 0--232 0--234 0--235 0--236 0--238 0--389 0--390 0--391 0--392 0--393 0--394 0--395 0--396 0--397 0--398 0--399 0--400 0--401 0--402 0--403 0--404 0--405 0--406 0--407 0--408 0--409 0--410 0--411 0--1 0--4 0--5 0--8 0--9 0--11 0--17 0--20 0--21 0--29 0--30 0--73 0--74 0--75 0--76 0--77 0--78 0--80 0--83 0--112 0--114 0--115 0--116 0--117 0--119 0--124 0--126 0--139 0--140 0--148 0--154 0--169 0--170 0--191 0--198 0--199 0--201 0--205 0--206 0--207 0--212 0--213 0--214 0--215 0--216 0--217 0--218 0--219 0--220 0--221 0--222 0--223 0--224 0--225 0--233 0--237 22--26 22--27 22--28 22--31 22--32 22--33 22--34 22--35 22--36 22--38 22--39 22--40 22--41 22--42 22--43 22--44 22--45 22--46 22--47 22--48 22--49 22--50 22--51 22--53 22--54 22--55 22--56 22--57 22--58 22--59 22--60 22--61 22--62 22--63 22--64 22--66 22--21 22--23 22--24 22--25 22--29 22--30 22--37 22--52 22--65 81--84 81--85 81--86 81--87 81--88 81--89 81--90 81--91 81--92 81--93 81--94 81--95 81--96 81--97 81--98 81--99 81--100 81--101 81--102 81--103 81--104 81--105 81--106 81--107 81--108 81--109 81--110 81--113 81--122 81--123 81--128 81--132 81--134 81--137 81--138 81--141 81--142 81--143 81--144 81--145 81--146 81--147 81--1 81--4 81--5 81--8 81--9 81--11 81--20 81--21 81--29 81--65 81--73 81--74 81--75 81--77 81--80 81--82 81--83 81--111 81--112 81--114 81--115 81--116 81--117 81--118 81--119 81--120 81--121 81--124 81--125 81--126 81--127 81--129 81--130 81--131 81--133 81--135 81--136 81--139 81--140 81--148 160--161 160--162 160--163 160--164 160--165 160--166 160--167 160--168 160--171 160--172 160--173 160--174 160--175 160--176 160--177 160--178 160--179 160--180 160--181 160--182 160--183 160--184 160--185 160--186 160--187 160--188 160--189 160--25 160--52 160--76 160--78 160--111 160--114 160--119 160--124 160--135 160--169 160--170 238--239 238--240 238--241 238--242 238--243 238--244 238--245 238--246 238--247 238--248 238--249 238--250 238--251 238--252 238--253 238--254 238--255 238--256 238--257 238--258 238--259 238--260 238--261 238--262 238--263 238--264 238--265 238--266 238--267 238--268 238--269 238--270 238--271 238--272 238--273 238--274 238--275 238--276 238--277 238--278 238--279 238--280 238--281 238--282 238--283 238--284 238--285 238--286 238--287 238--288 238--289 238--290 238--292 238--293 238--294 238--296 238--297 238--298 238--299 238--303 238--304 238--305 238--306 238--307 238--308 238--309 238--310 238--311 238--312 238--313 238--314 238--315 238--316 238--317 238--318 238--319 238--320 238--321 238--322 238--323 238--324 238--325 238--326 238--327 238--328 238--329 238--330 238--331 238--332 238--333 238--334 238--335 238--336 238--337 238--338 238--339 238--340 238--341 238--342 238--343 238--344 238--345 238--346 238--347 238--348 238--350 238--351 238--352 238--353 238--354 238--355 238--356 238--357 238--358 238--359 238--360 238--361 238--362 238--363 238--364 238--365 238--366 238--367 238--368 238--369 238--370 238--371 238--372 238--373 238--374 238--375 238--376 238--377 238--378 238--379 238--380 238--381 238--382 238--383 238--384 238--385 238--386 238--387 238--388 238--17 238--23 238--24 238--37 238--77 238--77 238--78 238--82 238--114 238--118 238--119 238--120 238--121 238--124 238--125 238--126 238--127 238--129 238--130 238--131 238--133 238--136 238--154 238--191 238--198 238--199 238--201 238--205 238--206 238--207 238--212 238--213 238--214 238--215 238--216 238--217 238--218 238--219 238--220 238--221 238--222 238--223 238--224 238--225 238--237 238--291 238--295 238--300 238--301 238--302 238--349 238--349 411--412 411--413 411--414 411--415 411--416 411--417 411--418 411--419 411--420 411--421 411--422 411--423 411--424 411--425 411--426 411--427 411--428 411--429 411--430 411--431 411--432 411--433 411--434 411--435 411--436 411--437 411--233 411--237 411--291 411--295 411--300 411--301 411--302 0 2 3 6 7 10 12 13 14 15 16 18 19 22 26 27 28 31 32 33 34 35 36 38 39 40 41 42 43 44 45 46 47 48 49 50 51 53 54 55 56 57 58 59 60 61 62 63 64 66 67 68 69 70 71 72 79 81 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 113 122 123 128 132 134 137 138 141 142 143 144 145 146 147 149 150 151 152 153 155 156 157 158 159 160 161 162 163 164 165 166 167 168 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 192 193 194 195 196 197 200 202 203 204 208 209 210 211 226 227 228 229 230 231 232 234 235 236 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 292 293 294 296 297 298 299 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 1 4 5 8 9 11 17 20 21 23 24 25 29 30 37 52 65 73 74 75 76 77 78 80 82 83 111 112 114 115 116 117 118 119 120 121 124 125 126 127 129 130 131 133 135 136 139 140 148 154 169 170 191 198 199 201 205 206 207 212 213 214 215 216 217 218 219 220 221 222 223 224 225 233 237 291 295 300 301 302 349

In [214]:
pos = draw.arf_layout(minigraph2)
gt.graph_draw(minigraph2, pos=pos)


Out[214]:
<PropertyMap object with key type 'Vertex' and value type 'vector<double>', for Graph 0x1167a76d0, at 0x1185fc590>

In [215]:
pos = draw.sfdp_layout(minigraph2, cooling_step=0.95)
gt.graph_draw(minigraph2, pos=pos)


Out[215]:
<PropertyMap object with key type 'Vertex' and value type 'vector<double>', for Graph 0x1167a76d0, at 0x1185fca90>

In [196]:
pos = draw.arf_layout(minigraph2)
deg = minigraph2.degree_property_map("in")
gt.graph_draw(minigraph2, pos=pos, vertex_fill_color=deg)


Out[196]:
<PropertyMap object with key type 'Vertex' and value type 'vector<double>', for Graph 0x117703d50, at 0x116aae450>

In [219]:
state = gt.minimize_nested_blockmodel_dl(minigraph2, deg_corr=True)
gt.draw_hierarchy(state)


/usr/local/lib/python2.7/site-packages/graph_tool/draw/cairo_draw.py:619: UserWarning: Unknown parameter: layout_callback
  warnings.warn("Unknown parameter: " + k, UserWarning)
/usr/local/lib/python2.7/site-packages/graph_tool/draw/cairo_draw.py:619: UserWarning: Unknown parameter: key_press_callback
  warnings.warn("Unknown parameter: " + k, UserWarning)
Out[219]:
(<PropertyMap object with key type 'Vertex' and value type 'vector<double>', for Graph 0x1167a76d0, at 0x117640190>,
 <Graph object, directed, with 439 vertices and 438 edges at 0x117654e10>,
 <PropertyMap object with key type 'Vertex' and value type 'vector<double>', for Graph 0x117654e10, at 0x117648050>)

In [217]:
gt.graph_draw(minigraph2, pos=draw.sfdp_layout(minigraph2, cooling_step=0.99),
              vertex_fill_color=minigraph2.vertex_index, vertex_size=2,
              edge_pen_width=1)


Out[217]:
<PropertyMap object with key type 'Vertex' and value type 'vector<double>', for Graph 0x1167a76d0, at 0x1185fce50>

In [238]:
# LOOK INTO CO-AUTHORSHIP NETWORKS
# https://graph-tool.skewed.de/static/doc/draw.html
# http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2721762/
g = minigraph2
g.purge_vertices()
state = gt.minimize_nested_blockmodel_dl(g, deg_corr=True)
t = gt.get_hierarchy_tree(state)[0]
tpos = pos = gt.radial_tree_layout(t, t.vertex(t.num_vertices() - 1), weighted=True)
cts = gt.get_hierarchy_control_points(g, t, tpos)
pos = g.own_property(tpos)
b = state.levels[0].b
shape = b.copy()
shape.a %= 14
gt.graph_draw(g, pos=pos, vertex_fill_color=b, vertex_shape=shape, edge_control_points=cts,
              edge_color=[0, 0, 0, 0.3], vertex_anchor=0)


Out[238]:
<PropertyMap object with key type 'Vertex' and value type 'vector<double>', for Graph 0x1167a76d0, at 0x11770ce50>

In [242]:
deg = minigraph2.degree_property_map("in")
deg.a = 4 * (np.sqrt(deg.a) * 0.5 + 0.4)
ebet = gt.betweenness(minigraph2)[1]
ebet.a /= ebet.a.max() / 10.
eorder = ebet.copy()
eorder.a *= -1
pos = gt.sfdp_layout(minigraph2)
control = g.new_edge_property("vector<double>")
for e in g.edges():
    d = np.sqrt(sum((pos[e.source()].a - pos[e.target()].a) ** 2)) / 5
    control[e] = [0.3, d, 0.7, d]
gt.graph_draw(minigraph2, pos=pos, vertex_size=deg, vertex_fill_color=deg, vorder=deg,
              edge_color=ebet, eorder=eorder, edge_pen_width=ebet,
              edge_control_points=control)
# SFDP force-directed layout of a Price network with 1500 nodes. The vertex size and color indicate the degree, 
# and the edge color and width the edge betweenness centrality.
# https://graph-tool.skewed.de/static/doc/draw.html


Out[242]:
<PropertyMap object with key type 'Vertex' and value type 'vector<double>', for Graph 0x1167a76d0, at 0x116adaa10>

In [218]:
gt.graph_draw(minigraph2, pos=draw.sfdp_layout(minigraph2, cooling_step=0.99),
              vertex_fill_color=minigraph2.vertex_index, vertex_size=2,
              edge_pen_width=1)


Out[218]:
<PropertyMap object with key type 'Vertex' and value type 'vector<double>', for Graph 0x1167a76d0, at 0x1185fce90>

In [ ]:

will make sense to trim down dataset to papers that have interesting citation networks within PMC, since many papers do not! This is a reasonable excuse for this, will also shrink data to make it more managable for dynamic plotting on the webserver. Can I make a statement about liklihood of being in PMC open access by field? Keyword?

Rather than using graph network for dynamic portion of website, to save resources, I can precompute in-neighbors and out-neighbors and save into a sql table and just call these. Alo, in terms of excluding papers that aren't interesting (not in PMC) - rather than doing this, perhaps just don't have them autocomplete in the search window and/or de-rank them in results? Could also color them differently in the graph networks (e.g. red) to indicate that the network terminates there, without specifying why (not in PMC vs too new)?


In [ ]:

to 3 degrees


In [245]:
rootpmid = 26247944
minigraph2 = gt.Graph()
vertexdict2 = dict()
for neigh in g.vertex(pmid_vertex_dict[rootpmid]).out_neighbours(): 
    minigraph2, vertexdict2 = addedge(minigraph2, rootpmid, neigh, vertexdict2)
    try:
        for neigh2 in g.vertex(neigh).out_neighbours():
            minigraph2, vertexdict2 = addedge(minigraph2, neigh, neigh2, vertexdict2)
            try:
                for neigh3 in g.vertex(neigh2).out_neighbours():
                    minigraph2, vertexdict2 = addedge(minigraph2, neigh2, neigh3, vertexdict2)
            except KeyError:
                    print "3rd degree node {} not in graph g".format(neigh)
    except KeyError:
        print "2nd degree node {} not in graph g".format(neigh)

In [247]:
deg = minigraph2.degree_property_map("in")
gt.graph_draw(minigraph2, vertex_fill_color=deg)


Out[247]:
<PropertyMap object with key type 'Vertex' and value type 'vector<double>', for Graph 0x116ada450, at 0x1167a7210>

to n degrees


In [402]:
def addedge(graphobject, source, dest, vertexdict):
    if source not in vertexdict:
        v = graphobject.add_vertex()
        vertexdict[source] = int(v)
    if dest not in vertexdict:
        v = graphobject.add_vertex()
        vertexdict[dest] = int(v)
    graphobject.add_edge(vertexdict[source], vertexdict[dest])
    return graphobject, vertexdict

#def addlayer(graphobject, rootnode, vertexdict, direction = 'out'):
#    if direction == 'out':
#        for neigh in graphobject.vertex(rootnode).out_neighbors():
#            graphobject, vertexdict = addedge(graphobject, rootnode, neigh, vertexdict)
#    elif direction == 'in':
#        for neigh in graphobject.vertex(rootnode).in_neighbors():
#            graphobject, vertexdict = addedge(graphobject, rootnode, neigh, vertexdict)
#    else:
#        print "Specify direction as either in or out"
       
import Queue
import graph_tool.all as gt
def buildlocalgraph(rootnode, mastergraph, indepth = 0, outdepth = 2):
    _g =gt.Graph()
    _vertexdict = dict()
    q = Queue.Queue()
    q.put((rootnode, 0))
    #first go in out direction
    while not q.empty():
        node = q.get()
        if node[1] <= outdepth:
            try:
                for neigh in _g.vertex(node[0]).out_neighbors():
                    _g, _vertexdict = addedge(_g, node[0], neigh, _vertexdict)
            except KeyError:
                "{} degree node {} not in graph g".format(node[1], node[0])
                
def buildlocalgraph(rootnode, mastergraph, indepth = 0, outdepth = 2):
    _g =gt.Graph()
    _vertexdict = dict()
    q = Queue.Queue()
    #first go in out direction
    q.put((rootnode, 0))
    while not q.empty():
        node = q.get()
        if node[1] < outdepth:
            try:
                for neigh in mastergraph.vertex(node[0]).out_neighbours():
                    _g, _vertexdict = addedge(_g, node[0], neigh, _vertexdict)
                    q.put((neigh, node[1]+1))
            except KeyError:
                "{} degree node {} not in graph g".format(node[1], node[0])
    #now go in in direction
    q.put((rootnode, 0))
    while not q.empty():
        node = q.get()
        if node[1] < indepth:
            try:
                for neigh in mastergraph.vertex(node[0]).in_neighbours():
                    _g, _vertexdict = addedge(_g, neigh, node[0], _vertexdict)
                    q.put((neigh, node[1]+1))
            except KeyError:
                "{} degree node {} not in graph g".format(node[1], node[0])
    return _g, _vertexdict

In [343]:
rootpmid = 26247944
minigraph3, vertexdict3 = buildlocalgraph(pmid_vertex_dict[rootpmid], g, 2, 0)


(5690856, 0)

In [383]:
def countoutneigh(g, node):
    count = 0
    for neigh in g.vertex(node).out_neighbours(): count+= 1
    return count

def countinneigh(g, node):
    count = 0
    for neigh in g.vertex(node).in_neighbours(): count+= 1
    return count

In [384]:
countoutneigh(g, 5690856)


Out[384]:
137

In [389]:
for neigh in g.vertex(5690856).out_neighbours():
    if countoutneigh(g, neigh) > 0: print countoutneigh(g, neigh), neigh


45 973846
82 1697637
38 2397896
196 2399401
33 5690874

In [393]:
countinneigh(g, 2399401)


Out[393]:
12

In [398]:
for i in range(2399400, 2399402):
    print countinneigh(g, i)


1
12

In [439]:
minigraph3, vertexdict3 = buildlocalgraph(2399401, g, 10, 10)

In [440]:
deg = minigraph3.degree_property_map("in")
gt.graph_draw(minigraph3, vertex_fill_color=deg, output_size=(1200,1200))


Out[440]:
<PropertyMap object with key type 'Vertex' and value type 'vector<double>', for Graph 0x117162ad0, at 0x10e7d3590>

In [418]:
minigraph3.purge_vertices()
state = gt.minimize_nested_blockmodel_dl(minigraph3, deg_corr=True)
t = gt.get_hierarchy_tree(state)[0]
tpos = pos = gt.radial_tree_layout(t, t.vertex(t.num_vertices() - 1), weighted=True)
cts = gt.get_hierarchy_control_points(minigraph3, t, tpos)
pos = minigraph3.own_property(tpos)
b = state.levels[0].b
shape = b.copy()
shape.a %= 14
gt.graph_draw(minigraph3, pos=pos, vertex_fill_color=b, vertex_shape=shape, edge_control_points=cts,
              edge_color=[0, 0, 0, 0.3], vertex_anchor=0)


Out[418]:
<PropertyMap object with key type 'Vertex' and value type 'vector<double>', for Graph 0x1185fcc10, at 0x11765f190>

In [421]:
minigraph3, vertexdict3 = buildlocalgraph(2399401, g, 10, 10)
state = gt.minimize_nested_blockmodel_dl(minigraph3, deg_corr=True)
gt.draw_hierarchy(state)


Out[421]:
(<PropertyMap object with key type 'Vertex' and value type 'vector<double>', for Graph 0x1172882d0, at 0x1173d0110>,
 <Graph object, directed, with 601 vertices and 600 edges at 0x117288d10>,
 <PropertyMap object with key type 'Vertex' and value type 'vector<double>', for Graph 0x117288d10, at 0x117288cd0>)

In [451]:
import math

g = minigraph3
print(g.num_vertices(), g.num_edges())

#reduce to only connected nodes
g = gt.GraphView(g,vfilt=lambda v: (v.out_degree() > 0) and (v.in_degree() > 0) )
g.purge_vertices()

print(g.num_vertices(), g.num_edges())

#use 1->Republican, 2->Democrat
red_blue_map = {1:(1,0,0,1),0:(0,0,1,1)}
plot_color = g.new_vertex_property('vector<double>')
g.vertex_properties['plot_color'] = plot_color
#for v in g.vertices():
#    plot_color[v] = red_blue_map[g.vertex_properties['value'][v]]  ######### USE THIS FOR GENDER??

#### THIS WAS USED FOR RED/BLUE POLITICAL AFIL
### SEE: http://stackoverflow.com/questions/238724/visualizing-undirected-graph-thats-too-large-for-graphviz

#edge colors
alpha=0.15
edge_color = g.new_edge_property('vector<double>')
g.edge_properties['edge_color']=edge_color
for e in g.edges():
    if plot_color[e.source()] != plot_color[e.target()]:
        if plot_color[e.source()] == (0,0,1,1):
            #orange on dem -> rep
            edge_color[e] = (255.0/255.0, 102/255.0, 0/255.0, alpha)
        else:
            edge_color[e] = (102.0/255.0, 51/255.0, 153/255.0, alpha)            
    #red on rep-rep edges
    elif plot_color[e.source()] == (1,0,0,1):
        edge_color[e] = (1,0,0, alpha)
    #blue on dem-dem edges
    else:
        edge_color[e] = (0,0,1, alpha)

state = gt.minimize_nested_blockmodel_dl(g, deg_corr=True)
bstack = state.get_bstack()
#t = gt.get_hierarchy_tree(bstack)[0]
tpos = pos = gt.radial_tree_layout(t, t.vertex(t.num_vertices() - 1), weighted=True)
cts = gt.get_hierarchy_control_points(g, t, tpos)
pos = g.own_property(tpos)
b = bstack[0].vp["b"]

#labels
text_rot = g.new_vertex_property('double')
g.vertex_properties['text_rot'] = text_rot
for v in g.vertices():
    if pos[v][0] >0:
        text_rot[v] = math.atan(pos[v][1]/pos[v][0])
    else:
        text_rot[v] = math.pi + math.atan(pos[v][1]/pos[v][0])

gt.graph_draw(g, pos=pos, vertex_fill_color=g.vertex_properties['plot_color'], 
            vertex_color=g.vertex_properties['plot_color'],
            edge_control_points=cts,
            vertex_size=10,
            #vertex_text=g.vertex_properties['label'],
            vertex_text_rotation=g.vertex_properties['text_rot'],
            vertex_text_position=1,
            vertex_font_size=9,
            edge_color=g.edge_properties['edge_color'],
            vertex_anchor=0,
            #bg_color=[0,0,0,1],
            output_size=[4024,4024])
            #output='polblogs_blockmodel.png')


(584, 966)
(152, 273)
Out[451]:
<PropertyMap object with key type 'Vertex' and value type 'vector<double>', for Graph 0x11728fc50, at 0x116653090>

FIND "GOOD" PAPER WITH MANY IN AND OUT CITES


In [452]:
import cPickle as pickle
g = pickle.load(open("full_graph.p", "rb"))
pmid_vertex_dict = pickle.load(open("full_graph_pmid_vertex_dict.p", "rb"))
rev_pmid_vertex_dict = {v: k for k, v in pmid_vertex_dict.items()}

In [461]:
for node in range(5000):
    inn = countinneigh(g,node)
    outn = countoutneigh(g, node)
    if (inn > 30) & (outn > 30): print node, rev_pmid_vertex_dict[node]


48 20502679
54 18803808
195 21042317
259 19783989
332 24304892
597 19874578
734 15933209
939 20804570
999 17211480
1000 16404366
1028 20964822
1042 19582160
1043 21884641
1130 21460848
1184 18349830
1214 22847613
1407 22821563
1514 20601955
1521 15535852
1880 16538219
1951 21917858
2059 18981049
2068 17088286
2218 17493263
2220 16643655
2232 20813035
2669 23700391
2696 23034130
2783 16948836
2798 19934210
3021 19621072
3223 20615901
3229 21423178
3230 19002258
3538 18682804
3705 23000897
3754 21247443
4603 21097890
4811 22006249
4820 16563223
4908 16751849

In [473]:
rootpmid = 20502679
minigraph3, vertexdict3 = buildlocalgraph(pmid_vertex_dict[rootpmid], g, 2, 5)
deg = minigraph3.degree_property_map("out") #out AND in how?
gt.graph_draw(minigraph3, vertex_fill_color=deg, output_size=(1200,1200))


Out[473]:
<PropertyMap object with key type 'Vertex' and value type 'vector<double>', for Graph 0x116d10410, at 0x116602350>

In [477]:
minigraph3, vertexdict3 = buildlocalgraph(pmid_vertex_dict[rootpmid], g, 5, 5)
minigraph3.purge_vertices()
state = gt.minimize_nested_blockmodel_dl(minigraph3, deg_corr=True)
t = gt.get_hierarchy_tree(state)[0]
tpos = pos = gt.radial_tree_layout(t, t.vertex(t.num_vertices() - 1), weighted=True)
cts = gt.get_hierarchy_control_points(minigraph3, t, tpos)
pos = minigraph3.own_property(tpos)
b = state.levels[0].b
shape = b.copy()
shape.a %= 14
gt.graph_draw(minigraph3, pos=pos, vertex_fill_color=b, vertex_shape=shape, edge_control_points=cts,
              edge_color=[0, 0, 0, 0.3], vertex_anchor=0)


Out[477]:
<PropertyMap object with key type 'Vertex' and value type 'vector<double>', for Graph 0x117410cd0, at 0x1164cc210>

In [476]:
minigraph3, vertexdict3 = buildlocalgraph(pmid_vertex_dict[rootpmid], g, 5, 5)
state = gt.minimize_nested_blockmodel_dl(minigraph3, deg_corr=True)
gt.draw_hierarchy(state)


Out[476]:
(<PropertyMap object with key type 'Vertex' and value type 'vector<double>', for Graph 0x117410650, at 0x1174a2c10>,
 <Graph object, directed, with 2319 vertices and 2318 edges at 0x116d10e50>,
 <PropertyMap object with key type 'Vertex' and value type 'vector<double>', for Graph 0x116d10e50, at 0x11653e050>)

NEXT: CO-AUTHORSHIP / COLLABORATION NETWORKS (e.g. can click author name on paper and see network info), TAG PREDICTIONS, SEARCH ENGINE (limited to interesting papers with good nearby citation network counts?), "Self cite index" to apply to authors (and "adjusted" H-Index?), by gender self cites

first labels


In [2]:
def addedge(graphobject, source, dest, vertexdict, v_label):
    if source not in vertexdict:
        v = graphobject.add_vertex()
        vertexdict[source] = int(v)
        v_label[v] = str(rev_pmid_vertex_dict[dest])
    if dest not in vertexdict:
        v = graphobject.add_vertex()
        vertexdict[dest] = int(v)
        v_label[v] = str(rev_pmid_vertex_dict[dest])
    graphobject.add_edge(vertexdict[source], vertexdict[dest])
    return graphobject, vertexdict, v_label
       
import Queue
import graph_tool.all as gt
                
def buildlocalgraph(rootnode, mastergraph, indepth = 0, outdepth = 2):
    _g =gt.Graph()
    _vertexdict = dict()
    q = Queue.Queue()
    v_label = _g.new_vertex_property("string")
    #first go in out direction
    q.put((rootnode, 0))
    while not q.empty():
        node = q.get()
        if node[1] < outdepth:
            try:
                for neigh in mastergraph.vertex(node[0]).out_neighbours():
                    _g, _vertexdict, v_label = addedge(_g, node[0], neigh, _vertexdict, v_label)
                    q.put((neigh, node[1]+1))
            except KeyError:
                print "{} degree node {} not in graph g".format(node[1], node[0])
    #now go in in direction
    q.put((rootnode, 0))
    while not q.empty():
        node = q.get()
        if node[1] < indepth:
            try:
                for neigh in mastergraph.vertex(node[0]).in_neighbours():
                    _g, _vertexdict, v_label = addedge(_g, neigh, node[0], _vertexdict, v_label)
                    q.put((neigh, node[1]+1))
            except KeyError:
                "{} degree node {} not in graph g".format(node[1], node[0])
    return _g, _vertexdict, v_label

In [542]:
rootpmid = 20502679
#rev_pmid_vertex_dict = {v: k for k, v in pmid_vertex_dict.items()}
minigraph3, vertexdict3, v_label = buildlocalgraph(pmid_vertex_dict[rootpmid], g, 0, 2)
deg = minigraph3.degree_property_map("out") #out AND in how?
gt.graph_draw(minigraph3, vertex_fill_color=deg, vertex_text=v_label, output_size=(1200,1200))


Out[542]:
<PropertyMap object with key type 'Vertex' and value type 'vector<double>', for Graph 0x116aa7190, at 0x116ada250>

In [544]:
rootpmid = 20502679
#rev_pmid_vertex_dict = {v: k for k, v in pmid_vertex_dict.items()}
minigraph3, vertexdict3, v_label = buildlocalgraph(pmid_vertex_dict[rootpmid], g, 2, 2)
deg = minigraph3.degree_property_map("out") #out AND in how?
gt.graph_draw(minigraph3, vertex_fill_color=deg, output_size=(1200,1200))


Out[544]:
<PropertyMap object with key type 'Vertex' and value type 'vector<double>', for Graph 0x116aa7410, at 0x117162b90>

TF-IDF


In [587]:
import collections
import nltk
import string
import nltk.stem.porter

def stem_tokens(tokens, stemmer):
    stemmed = []
    for item in tokens:
        stemmed.append(stemmer.stem(item))
    return stemmed

tokens = map(string.lower, nltk.word_tokenize(teststring))
stemmer = nltk.stem.porter.PorterStemmer()
stemmed = stem_tokens(tokens, stemmer)
count = collections.Counter(stemmed)
print count.most_common(100)


[(u',', 6), (u'we', 5), (u'.', 5), (u'the', 5), (u'in', 4), (u'it', 3), (u'and', 2), (u'then', 2), (u'text', 2), (u'tfidfvector', 2), (u'to', 2), (u"'s", 2), (u'a', 2), (u'word', 2), (u'remov', 2), (u'first', 2), (u'shakespear', 1), (u'term', 1), (u'creat', 1), (u'pass', 1), (u'an', 1), (u'own', 1), (u'through', 1), (u'tf-idf', 1), (u'file', 1), (u'our', 1), (u'articl', 1), (u'use', 1), (u'built', 1), (u"'", 1), (u'few', 1), (u'perform', 1), (u'next', 1), (u'call', 1), (u'which', 1), (u'input', 1), (u':', 1), (u'function', 1), (u'custom', 1), (u'rather', 1), (u'everi', 1), (u'that', 1), (u'each', 1), (u'stop', 1), (u"'known", 1), (u'initi', 1), (u'stem', 1), (u'lowercas', 1), (u'base', 1), (u'but', 1), (u'dictionari', 1), (u'particular', 1), (u'thing', 1), (u'given', 1), (u'than', 1), (u'on', 1), (u'convert', 1), (u'found', 1), (u'for', 1), (u'of', 1), (u'scikit-learn', 1), (u'punctuat', 1), (u'iter', 1), (u'doe', 1), (u'collect', 1), (u'token', 1), (u'calcul', 1), (u'fit_transform', 1), (u'nltk', 1)]

In [611]:
rootpmid = 20502679
c.execute('''SELECT abstract FROM abstracts WHERE pmid > ? AND pmid < ?''', [rootpmid-100, rootpmid+100])
corpus = c.fetchall()
print len(corpus)
#corpusflat = ['' + corpus[i] for i in range(len(corpus))]
for i, entry in enumerate(corpus):
    corpus[i] = str(corpus[i])


79

In [612]:
from sklearn.feature_extraction.text import TfidfVectorizer
tf = TfidfVectorizer(analyzer='word', ngram_range=(1,3), min_df = 0, stop_words = 'english')
tfidf_matrix =  tf.fit_transform(corpus)
feature_names = tf.get_feature_names() 
len(feature_names)


Out[612]:
26168

In [613]:
tfidf_matrix


Out[613]:
<79x26168 sparse matrix of type '<type 'numpy.float64'>'
	with 30583 stored elements in Compressed Sparse Row format>

In [615]:
episode = corpus[0]
phrase_scores = [pair for pair in zip(range(0, len(episode)), episode) if pair[1] > 0]
sorted(phrase_scores, key=lambda t: t[1] * -1)[:5]


Out[615]:
[(0, '('), (1, 'u'), (2, "'"), (3, 'O'), (4, 'v')]

In [628]:
dense = tfidf_matrix.todense()
episode = dense[1].tolist()[0]
phrase_scores = [pair for pair in zip(range(0, len(episode)), episode) if pair[1] > 0]
top10 = sorted(phrase_scores, key=lambda t: t[1] * -1)[:5]
for entry in top10:
    print feature_names[entry[0]]


dm1
clcn1
dmpk
splicing
mis

NEXT: run on full corpus, write predictor that takes PMID as an argument, save text to new sql table

THIS TAKES TOO MUCH RAM (>36GB), SEE SECOND APPROACH BELOW


In [634]:
c.execute('''SELECT pmid, abstract FROM abstracts''')
corpus = c.fetchall()
print len(corpus)
corpuspmid = []
#corpusflat = ['' + corpus[i] for i in range(len(corpus))]
for i, entry in enumerate(corpus):
    corpuspmid.append(corpus[i][0])
    corpus[i] = unicode(corpus[i][1])


927541

In [ ]:
from sklearn.feature_extraction.text import TfidfVectorizer
tf = TfidfVectorizer(analyzer='word', ngram_range=(1,3), min_df = 0, stop_words = 'english')
tfidf_matrix =  tf.fit_transform(corpus)
feature_names = tf.get_feature_names() 
len(feature_names)

In [ ]:
article = corpus[0]
phrase_scores = [pair for pair in zip(range(0, len(episode)), episode) if pair[1] > 0]
sorted(phrase_scores, key=lambda t: t[1] * -1)[:5]

SECOND APPROACH SEE SECOND NOTEBOOK 270616 gensim TFIDF


In [ ]:
import sqlite3
conn = sqlite3.connect('pmcv1-full.db')
c = conn.cursor()
c.execute('''SELECT pmid, abstract FROM abstracts''')
corpus = c.fetchall()
print len(corpus)
corpuspmid = []
for i, entry in enumerate(corpus):
    corpuspmid.append(corpus[i][0])
    corpus[i] = unicode(corpus[i][1])

In [ ]:
import gensim
testcorpus = []
for i in range(1000):
    testcorpus.append(corpus[i])

In [ ]:


In [ ]:


In [ ]:

Co-author networks

See: http://www-personal.umich.edu/~mejn/papers/cnlspre.pdf Initially, what I want to do is


In [1]:
import sqlite3
conn = sqlite3.connect('pmcv1-full.db')
c = conn.cursor()

In [2]:
rootpmid = 20502679
c.execute('''SELECT pmid, fn, ln FROM authors WHERE pmid > ? AND pmid < ?''', [rootpmid-100, rootpmid+100])
authors = c.fetchall()
print len(authors)


595

In [11]:
#fullnames:
authors[i][1]+authors[i][2]


Out[11]:
u'NaihaoYe'

In [3]:
#import graph_tool.all as gt
import graph_tool as gt

In [5]:
import hashlib
def md5hash(string):
    return hashlib.md5(string).hexdigest()

def addedge(graphobject, source, dest, vertexdict):
    if source not in vertexdict:
        v = graphobject.add_vertex()
        vertexdict[source] = int(v)
    if dest not in vertexdict:
        v = graphobject.add_vertex()
        vertexdict[dest] = int(v)
    graphobject.add_edge(vertexdict[source], vertexdict[dest])
    return graphobject, vertexdict

In [6]:
import itertools
g = gt.Graph(directed = False)
author_vertex_dict = dict()
author_full_name_dict = dict()
c.execute('''SELECT pmid, fn, ln FROM authors WHERE pmid > ? AND pmid < ?''', [rootpmid-100, rootpmid+100])
authors = c.fetchall()
authorspaper = []
currpaper = authors[0][0]
for entry in authors:
    #accumulate by paper
    authorcat = unicode(entry[1]+entry[2]).replace(" ", "").lower()
    authorspaper.append(authorcat)
    author_full_name_dict[authorcat] = (entry[1],entry[2])
    if entry[0] != currpaper:
        #add author nodes and edges
        for comb in itertools.combinations(authorspaper, 2):
            addedge(g, comb[0], comb[1], author_vertex_dict)
        #reset and begin accumulating again
        currpaper = entry[0]
        authorspaper = []

In [7]:
g


Out[7]:
<Graph object, undirected, with 588 vertices and 2863 edges at 0x113a4ad90>

In [56]:
author_vertex_dict["serenafondaumani"]


Out[56]:
490

In [60]:
author_full_name_dict["serenafondaumani"]


Out[60]:
(u'Serena', u'Fonda Umani')

In [ ]:
#%matplotlib inline
#gt.graph_draw(g, output_size=(1200,1200), output_format="png")

build full graph

import itertools g = gt.Graph(directed = False) author_vertex_dict = dict() author_full_name_dict = dict() c.execute('''SELECT pmid, fn, ln FROM authors''') authors = c.fetchall() authorspaper = [] currpaper = authors[0][0] for entry in authors: #accumulate by paper authorcat = unicode(entry[1]+entry[2]).replace(" ", "").lower() authorspaper.append(authorcat) author_full_name_dict[authorcat] = (entry[1],entry[2]) if entry[0] != currpaper: #add author nodes and edges for comb in itertools.combinations(authorspaper, 2): addedge(g, comb[0], comb[1], author_vertex_dict) #reset and begin accumulating again currpaper = entry[0] authorspaper = []

In [12]:
#THIS CALCULATION IS SLOW!!!! Like 5 hours slow. SVE THE DICTS AND THE GRAPH WHEN DONE
import cPickle as pickle pickle.dump(g, open("authors_full_graph.p", "wb")) pickle.dump(author_vertex_dict, open("authors_vertex_dict.p", "wb")) pickle.dump(author_full_name_dict, open("authors_full_name_dict.p", "wb"))

In [1]:
import cPickle as pickle
g = pickle.load(open("authors_full_graph.p", "rb"))
author_vertex_dict = pickle.load(open("authors_vertex_dict.p", "rb"))
author_full_name_dict = pickle.load(open("authors_full_name_dict.p", "rb"))

in future may want to give edges weights, where appearing on one paper together = 1, 2 = 2, and so on.


In [2]:
def addedge(graphobject, source, dest, vertexdict):
    if source not in vertexdict:
        v = graphobject.add_vertex()
        vertexdict[source] = int(v)
    if dest not in vertexdict:
        v = graphobject.add_vertex()
        vertexdict[dest] = int(v)
    graphobject.add_edge(vertexdict[source], vertexdict[dest])
    return graphobject, vertexdict

import Queue
import graph_tool.all as gt
def buildlocalgraph(rootnode, mastergraph, indepth = 0, outdepth = 2):
    _g =gt.Graph()
    _vertexdict = dict()
    q = Queue.Queue()
    q.put((rootnode, 0))
    #first go in out direction
    while not q.empty():
        node = q.get()
        if node[1] <= outdepth:
            try:
                for neigh in _g.vertex(node[0]).out_neighbors():
                    _g, _vertexdict = addedge(_g, node[0], neigh, _vertexdict)
            except KeyError:
                "{} degree node {} not in graph g".format(node[1], node[0])
                
def buildlocalgraphundirected(rootnode, mastergraph, indepth = 0, outdepth = 2):
    _g =gt.Graph(directed = False)
    _vertexdict = dict()
    q = Queue.Queue()
    #first go in out direction
    q.put((rootnode, 0))
    while not q.empty():
        node = q.get()
        if node[1] < outdepth:
            try:
                for neigh in mastergraph.vertex(node[0]).out_neighbours():
                    _g, _vertexdict = addedge(_g, node[0], neigh, _vertexdict)
                    q.put((neigh, node[1]+1))
            except KeyError:
                "{} degree node {} not in graph g".format(node[1], node[0])
    #now go in in direction
    q.put((rootnode, 0))
    while not q.empty():
        node = q.get()
        if node[1] < indepth:
            try:
                for neigh in mastergraph.vertex(node[0]).in_neighbours():
                    _g, _vertexdict = addedge(_g, neigh, node[0], _vertexdict)
                    q.put((neigh, node[1]+1))
            except KeyError:
                "{} degree node {} not in graph g".format(node[1], node[0])
    return _g, _vertexdict

In [33]:
#plots
#%matplotlib inline
#import matplotlib.pyplot as plt
#import numpy as np

In [3]:
#author = u'martinjshipley'
author = u'emmanuellebouzigon'
minigraph3, vertexdict3 = buildlocalgraphundirected(author_vertex_dict[author], g, 1, 1)
deg = minigraph3.degree_property_map("out") #out AND in how?
gt.graph_draw(minigraph3, vertex_fill_color=deg, output_size=(1200,1200), inline=True)


Out[3]:
<PropertyMap object with key type 'Vertex' and value type 'vector<double>', for Graph 0x192adab90, at 0x1713e1e90>

In [16]:
author_vertex_dict


Out[16]:
{u'danutako\u0142o\u017cyn-krajewska': 245480,
 u'wen-tingchu': 2221970,
 u'deepakprakashmahara': 485554,
 u'sarahmweakley': 1222212,
 u'kristinap.vatcheva': 1576754,
 u'jeanpascalherault': 970387,
 u'tuulavaskilampi': 460076,
 u'charlesd.warden': 1721371,
 u'alirezaabdulahzadebaghaee': 1498307,
 u'ashishpathak': 196499,
 u'chih-jaantai': 250113,
 u'stephaniedifrancesco': 313533,
 u'kennethfmanly': 1193570,
 u'carlenayuen': 1022453,
 u'georgezafiriou': 514825,
 u'dennisc.sgroi': 632897,
 u'cristinawisnivesky-colli': 2357595,
 u'kaiyuezhang': 551389,
 u'josephe.leclerc': 2046040,
 u'm.warner': 237154,
 u'carolinavillagra': 2181410,
 u'nancybaker': 1870875,
 u'tomoshigematsumoto': 509066,
 u'doughinerfeld': 1844157,
 u'candanzehraduvan': 1501797,
 u'esinsoy': 1509351,
 u'jonseo': 1666058,
 u'wilfriedmullens': 212634,
 u'evaperez-jimenez': 1689799,
 u'magdalenawadowska': 1613876,
 u'augustosimoes-barbosa': 1704858,
 u'n.s.bisht': 298384,
 u'razanwafai': 579042,
 u'douglasa.marchuk': 814811,
 u'robertacesa': 1840678,
 u'm.ibrahimtuglu': 2345977,
 u'robertt.downs': 1645971,
 u'fongchunchan': 1434549,
 u'm.b.smeets': 1639187,
 u'clareperkins': 350714,
 u'karlskretting': 453866,
 u'keithe.szulwach': 1689812,
 u'giorgisisaac': 53082,
 u'xingyongzhang': 721452,
 u'davidcourtin': 509118,
 u'byamahbrianmutamba': 199489,
 u'javierserrania': 952061,
 u'juanantoniomartinez': 342667,
 u'davide.ferrier': 1095554,
 u'zhihuitong': 319521,
 u'claudiaallen': 1056638,
 u'philipa.stein': 1814428,
 u'davidp.richman': 1446970,
 u'felipecuartero': 2090950,
 u'jean-paulcadoret': 12041,
 u'tamiriszanforlinolmosfernandes': 1562557,
 u'negusuworku': 493410,
 u'manuelsoto': 112177,
 u'hallahelgad\xf3ttir': 337919,
 u'shaktia.goel': 2052340,
 u'rosscranston': 2225621,
 u'm.arantxacolchero': 78952,
 u'lazarousmbulo': 2370672,
 u'lucindam.hilliard': 1807735,
 u'danieldrozdov': 288612,
 u'natashaluquin': 139875,
 u'keisukeshinozuka': 2245739,
 u'yunhuilu': 1797771,
 u'g\xfcnterjost': 1390213,
 u'dalanewkitzman': 548171,
 u'lindas.williams': 978477,
 u'yousunkim': 320685,
 u'i.wong': 305602,
 u'philippelou\xe2pre': 2055472,
 u'olgaigglessi-markopoulou': 516858,
 u'istvanjankovics': 132401,
 u'wilkof.a.r.verbakel': 2017344,
 u'joophermens': 1290060,
 u'wasifnkhan': 223413,
 u'anneloese.ruifrok': 1240465,
 u'manala.rahman': 1797177,
 u'aprila.herbert': 1801953,
 u'mamdouhabdulghafour': 1690704,
 u'leonardoramos': 1529577,
 u'alpaydinalp': 723239,
 u'vrosti': 1274833,
 u'katjae.wartenberg': 2019325,
 u'camilacespedes': 1634660,
 u'josl.m.l.lenoble': 1297140,
 u'shin-ichinakanuma': 768186,
 u'rogeriofaustinoribeiro': 1675930,
 u'k.zeppenfeld': 1239038,
 u'annabonini': 1037034,
 u'fantasiby': 1541617,
 u'amiramili': 1729436,
 u'yoshifumibeck': 1210150,
 u'kaih\xf6ffner': 147078,
 u'ricardoluizdantasmachado': 1855635,
 u'christopherlkepley': 1608258,
 u'annaotlewska': 248184,
 u'marijam.petrinovic': 1902337,
 u'm.rama': 831736,
 u'sineadmaguire': 2044697,
 u'konstantinospanagiotellis': 989646,
 u'randalpbabiuk': 1796418,
 u'ziadelkhatib': 2068868,
 u'li-yuanqiu': 1361073,
 u'sawsanfeki': 2363901,
 u'shunghanchoi': 1083249,
 u'm.ramu': 1952012,
 u'kellyormond': 1007326,
 u'hodakhoshbakht': 1400568,
 u'haraldguendel': 121824,
 u'robertobini': 1759929,
 u'michellel.hamilton': 1563657,
 u'marcelop.barros': 1442721,
 u'sharonoswald': 335960,
 u'sokratispastromas': 1352130,
 u'ilariadelprincipe': 1210422,
 u'ninahoque': 2138588,
 u'\xe9ricvignon': 533362,
 u'lauramacdougall': 691196,
 u'niluferersan': 1755636,
 u'ellenabegole': 1505670,
 u'seongshoonyoon': 491817,
 u'sheydashaafi': 2005889,
 u'silviafragoeiro': 1581093,
 u'albertoboffi': 474973,
 u'jackm.loomis': 2230558,
 u'kathleendegreef': 2051332,
 u'simoncarney': 372941,
 u'mstallman': 137904,
 u'birgittewalgreen': 534538,
 u'marcobaruffi': 433608,
 u'rsch\xf6nenberger': 339707,
 u'tom\xe1\u0161frant\xedk': 331977,
 u'xiao-rongzeng': 427151,
 u'jaapkeijer': 21304,
 u'ronc.mease': 1995396,
 u'stephaniec.j.keating': 2214231,
 u'mohameda.karmali': 1836328,
 u'juanacedano': 1534843,
 u'decaijin': 299410,
 u'terrieekitchner': 1898329,
 u'albandrialfraidi': 1990841,
 u'karinsievwright': 2297331,
 u'gauravgandhi': 2177634,
 u'qinzhilee': 2051424,
 u'ruiliang': 349196,
 u'sukwanhandali': 1632623,
 u'elifi.ekinci': 658550,
 u'j\xe9r\xf4meurbain': 750661,
 u'ayanamoore': 1897751,
 u'sheng-fuyou': 1891854,
 u'asafbachrach': 755405,
 u'alit.al-hinai': 1868629,
 u'rosariopoy': 755742,
 u'zhengangtang': 701777,
 u'juliaadler': 2064950,
 u'hsin-hsiunghuang': 50353,
 u'delelegnyilmagebremichael': 687841,
 u'lisae.kilpatrick': 1529799,
 u'krittantanarat': 1559993,
 u'marya.asirifi': 1555232,
 u'emanabdelzaher': 286187,
 u'diegoarcelli': 98188,
 u'alihamadanchi': 176028,
 u'woonyongkwon': 1384900,
 u'sung-pyolee': 1247418,
 u'dimitragkika': 588587,
 u'panagiotiszis': 738605,
 u'wekaufmann': 820620,
 u'alanlyons': 194073,
 u'valenzhuoyouyu': 262613,
 u'jonathanstrong': 1078311,
 u'maral.mberira': 150140,
 u'nadezdanbabenko': 2029526,
 u'abdullaha.al-mishari': 1050521,
 u'andresteinmetz': 1594188,
 u'kirstenrichter': 1874710,
 u'jordandevaan': 1966366,
 u'kiranshekar': 265297,
 u'ericy.zhang': 2173337,
 u'juanchen': 74195,
 u'joopvandenhoek': 2155659,
 u'araevuori': 1007144,
 u'jessicarosati': 84196,
 u'camerongreen': 2269128,
 u'beatrizmorancho': 112998,
 u'brunobetschart': 2281880,
 u'hideyukiando': 1444140,
 u'ssahay': 137315,
 u'alisteru.nicol': 903560,
 u'williamputzbach': 2322243,
 u'r.peravali': 769170,
 u'michelfranco': 2339250,
 u'danieltcarr': 1130132,
 u'irenemiguel-escalada': 1525778,
 u'muyebseo': 1214868,
 u's\xfchah.akp\u0131nar': 1969246,
 u'hongyangtang': 1286840,
 u'sabujacob': 181386,
 u'wenguangyin': 726551,
 u'michaelshill': 217270,
 u'aliazhardawasaz': 771020,
 u'rajeshkumarbag': 1515237,
 u'ivannikiforovminkov': 1715089,
 u's.m.sim': 717632,
 u'emmanuelledeniaud': 2165562,
 u'geruzaalvesdasilva': 174552,
 u'anjavoigt': 140154,
 u'wanyongho': 252214,
 u'andreatironi': 984846,
 u'stijnj.a.aper': 2219931,
 u'pradnyap.kanekar': 2043390,
 u'jwaldmann': 416465,
 u'adamkolondra': 9594,
 u'shailvyas': 1212821,
 u'g.j.frey': 1968649,
 u'pedrorifakis': 1472712,
 u'davidm.whiley': 1961264,
 u'karimattim\xe4kel\xe4': 1417042,
 u'louisemarsh': 352384,
 u'deborahmcfarland': 1853672,
 u'nian-songwang': 911943,
 u'adri\xe1nsantana-ramirez': 1241443,
 u'georgiosodimitracopoulos': 99519,
 u'prabahviwaraketiya': 1029521,
 u'kristenl.votruba': 1094797,
 u'katrinalwatson': 573679,
 u'joycedestoppelaar': 773795,
 u'jun-yakato': 90600,
 u'hansj.dehaas': 1932975,
 u'akimasahirata': 452384,
 u'leoniechristian': 1815480,
 u'brijendrak.tiwari': 2087458,
 u'robertp.erickson': 1874810,
 u'lejlamutapcicvajzovic': 1023545,
 u'juanj.caballero-novella': 2290999,
 u'oloftorper': 810771,
 u'chang-jinchoi': 1630913,
 u'b.selvan': 1733262,
 u'sadafkhan': 331512,
 u'k.a.jones': 1899177,
 u'julianepps': 1402904,
 u'asadimian': 359857,
 u'arlend.hanssen': 1333541,
 u'yuhanisaahmad': 1232401,
 u'emmanuelonwubikonwankwo': 2361681,
 u'agnieszkademczuk': 2339428,
 u'timothyj.garrett': 1013742,
 u'adams.deardorff': 933892,
 u'h.vicente': 2347050,
 u'pu-hongzhang': 657861,
 u'ant\xf3niopedrocachorodrigues': 1166642,
 u'cnmanjunath': 1351378,
 u'asirvathama.robert': 729069,
 u'yasuhirotsutani': 885312,
 u'na\xedlac.donascimento': 947888,
 u'anushasinha': 956849,
 u'hidekinarimatsu': 227632,
 u'yasuyukimizuno': 535874,
 u'maragladstone': 1042536,
 u'claudiae.vieirawiezel': 2109350,
 u'guanyangzou': 368182,
 u'loredanacolceriu': 961473,
 u'majlindalako': 284749,
 u'gajendrasingh': 2006587,
 u'p.w.davenport': 2258682,
 u'farahtasnim': 819248,
 u'j.m.reynard': 1529336,
 u'danielachieffo': 2147550,
 u'randylscherer': 1229786,
 u'dingenalvalstar': 1371709,
 u'galenshi': 1728265,
 u'claudiogaz': 2161401,
 u'jos\xe9augustobarreto-filho': 525446,
 u'annekejvanderkooi': 956920,
 u'pawe\u0142rog\xf3\u017c': 1602834,
 u'hamedamini': 563889,
 u'patr\xedciacsantos': 15116,
 u'stanblock': 2210635,
 u'arvasavada': 1645996,
 u'hestiaimperiano': 2015725,
 u'lindam.wakim': 2140142,
 u'zyanyaluc\xedazatarain': 2101659,
 u'guoyuduan': 180110,
 u'defenghu': 1819367,
 u'micheleserri': 1415993,
 u'daviddavis': 1550374,
 u'jamiesui-lamkwok': 42668,
 u'jonathanm.mcgavock': 979518,
 u'vikramkishorenavil': 1004121,
 u'michaelg.rosenberg': 451995,
 u'igorkfomin': 2023944,
 u'christellevasnier': 945828,
 u'paolakoenen': 1209310,
 u'lakshmivasudevan': 1445853,
 u'robertburkes': 742219,
 u'g.scottgazelle': 1470532,
 u'marillypalettas': 899384,
 u'nathana.hotaling': 836059,
 u'jasmingiller': 2019121,
 u'malba': 938613,
 u'bernardpeers': 59842,
 u'queendube-mbeye': 1835166,
 u'jameswalugembe': 693433,
 u'dawnolson': 604071,
 u'ke-xiongwang': 1811190,
 u'sakurakouozu': 69350,
 u'rebeccaslevin': 1067984,
 u'annethusharamatthias': 200204,
 u'wilhelmsch\xf6nhuber': 106670,
 u'sbaran': 1010082,
 u'annydewilde': 504254,
 u'dheerajkalladka': 2331016,
 u'perflisberg': 369866,
 u'alexandralubina-solomon': 977035,
 u'anna-sarakr\xe5ng': 1963547,
 u'masafumiumekage': 1314042,
 u'g.yosipovitch': 755252,
 u'javiera.men\xe9ndez': 1640760,
 u'j.w.lee': 606015,
 u'quangm.tieng': 883754,
 u'przemys\u0142awkarpowicz': 2206732,
 u'hmdavey': 414400,
 u'christiangeraut': 186147,
 u'dannym.tam': 2269751,
 u'sabihaabekhoukh': 1151667,
 u'sylvainferrant': 1957363,
 u'ornellamassa': 975024,
 u'wernerhosemann': 280769,
 u'xuminwang': 39551,
 u'mihakrofel': 2061745,
 u'chang-zhicai': 2067136,
 u'judsonaward': 22685,
 u'chen-yenchien': 2184211,
 u'emmanuellebouzigon': 945378,
 u'claramartin': 2261319,
 u'rosanadecarvalhocruz': 1852806,
 u'yongyeonjeong': 984697,
 u'jackiebye': 802646,
 u'rnazarian': 1727512,
 u'y.acremann': 2327200,
 u'l.fernandes': 1188373,
 u'briank.walker': 2127823,
 u'davidc.riccio': 903492,
 u'josephfschad': 1814678,
 u'anttisarela': 178712,
 u'kamelbouraoui': 2366401,
 u'marcaattiyeh': 27462,
 u'octaviocarvajal-zarrabal': 255677,
 u'v.glenntarcea': 1718343,
 u'tomonobukanasugi': 1240268,
 u'mcemcfadyen': 386969,
 u'patrickkarcher': 1533623,
 u'yinclin': 1691689,
 u'jamesb.uney': 71358,
 u'lancea.stechschulte': 2037398,
 u'ghimsiongow': 7545,
 u'aimonk.alkanani': 853430,
 u'g\xfcntherschweizer': 2139522,
 u'eduardofern\xe1ndez-cruz': 2109181,
 u'thomash.darrah': 684815,
 u'markcoates': 2373588,
 u'jing-taosun': 230128,
 u'benjaminb.green': 40805,
 u'roberthawes': 1186677,
 u'feijiaxu': 2230957,
 u'josephe.perales': 1288417,
 u'de-changli': 1004973,
 u'jayabalanm.joseph': 1844557,
 u'hichamlahlou': 634135,
 u'josipbegovac': 692870,
 u'takeshiohta': 199106,
 u'akirasassa': 1697511,
 u'j.t.paz': 1871210,
 u'kalliopimarinou': 2055379,
 u'chih-zenchang': 256532,
 u'kathys.evans': 787942,
 u'mariajos\xe8sisalli': 882817,
 u'aprilwatanabe': 772865,
 u'francescotaus': 2197627,
 u'brittee.lowther': 1443553,
 u'd.l.robinson': 209773,
 u's\xf3nnicagal\xe1n-gil': 1748856,
 u'alfredoraglio': 700895,
 u'a.dziurda': 829803,
 u'toshiyamanabe': 1424125,
 u'carolineschluth-bolard': 1803891,
 u'susanfischer': 1006520,
 u'manuelacalder\xf3n': 2060465,
 u'bakhtiyorrasulev': 1908169,
 u'myong-jinkang': 1446650,
 u'albertogianinetti': 40287,
 u'christodoulosistefanadis': 549119,
 u'maciejurbaniak': 1513840,
 u'shekharmallick': 1016084,
 u'jen-hotseng': 247726,
 u'jeffreydage': 2214300,
 u'alexanderi.f.simpson': 1306310,
 u'haruonishijima': 2381143,
 u'williama.lorenz': 1714150,
 u'ga\xebllediserens': 2254157,
 u'shinagawayoko': 856385,
 u'svenmagnuscarlsen': 163392,
 u'michaelgsboylan': 1002505,
 u'michelejmaiers': 372456,
 u'farslan': 416049,
 u'nikolaos\xa0p.mastroyiannopoulos': 562333,
 u'harshdhar': 1731975,
 u'jemalmhamid': 662624,
 u'fatmasarac': 514763,
 u'deborahhung': 1192574,
 u'chelseay.xu': 2171822,
 u'giorgiomalpeli': 587874,
 u'christoskrogias': 159120,
 u'jitendramangwani': 1003383,
 u'dyonimatiasdeoliveira': 2286145,
 u'toshironagasawa': 162337,
 u'byoungheonkang': 588713,
 u'ievgeniiaa.tiukova': 2189170,
 u'brianrbarrows': 728115,
 u'rossanavermiglio': 1758401,
 u'b\xe9lapapp': 156629,
 u'changgookang': 1928204,
 u'j.decolongon': 750934,
 u'sabinehummert': 895785,
 u'a.a.khajetoorians': 1413430,
 u'cliffs.han': 947849,
 u'anne-meretesoja': 2053574,
 u'mirrej.p.simons': 1123735,
 u'somagupta': 498406,
 u'ewasiwak': 224299,
 u'iliastsiflikas': 646624,
 u'kenichitakeshita': 161659,
 u'wouteradreschler': 372194,
 u'claudiozuniga': 2296775,
 u'toniarussell': 531353,
 u'jiankuang': 1449000,
 u'apostolossarivalasis': 501296,
 u'laurelslater': 1790984,
 u'liang-shunyou': 429594,
 u'samueljarbes': 205605,
 u'magalibouhours': 1747012,
 u'leermobley': 760243,
 u'berislavlisnic': 1885702,
 u'jassirwitta': 320161,
 u'azlarabmasrar': 2364228,
 u'mavisli': 467999,
 u'gilesn.johnson': 68402,
 u'lawrencef.pupulim': 767782,
 u'kennethd.mckenzie': 1752857,
 u'juliaanderson': 1355417,
 u'christophera.maloney': 839420,
 u'paulinam.dominiak': 1286430,
 u'hosseinkhalili': 201353,
 u'amandaldauphinee': 678915,
 u'guangpingwang': 2274818,
 u'arnoldg.e.leenders': 1979038,
 u'torbj\xf6rnnor\xe9n': 1238404,
 u'abdullahal-shimemeri': 467234,
 u'soshiterasaka': 2004530,
 u'm.m.brentani': 2276346,
 u'albertjbecker': 775946,
 u'evanthiadiamanti-kandarakis': 360663,
 u'laurenluongo': 2259425,
 u'patricka.murphy': 2225905,
 u'anne-mettehaase': 1133661,
 u'sigridhoyer-fender': 664756,
 u'nicolaasg.jaspers': 1969118,
 u'rajnitrivedi': 1537759,
 u'annae.barmintseva': 1795957,
 u'arimandansreddy': 610284,
 u'cl\xedmacocano': 1636944,
 u'angeldelgado': 1791249,
 u'davidw.cuthbertson': 2186792,
 u'douglaswhite': 342798,
 u'craig\nd.smith': 1695041,
 u'matijazupan': 652320,
 u'evawalla': 1803806,
 u'hiroshiokubo': 210360,
 u'miroslawlech': 1944758,
 u'grzegorzkreiner': 1152161,
 u'chimingjin': 1425103,
 u'ryokokishi': 1753448,
 u'jerelm.ezell': 1240296,
 u'henkschallig': 1296634,
 u'randallgriffith': 1414056,
 u'kennethfearon': 1467318,
 u'yisiangng': 1701731,
 u'nathaliemandjee': 221292,
 u'e.benizeau': 2305513,
 u'paulmeredith': 1411139,
 u'mustafabasbozkurt': 1003492,
 u'monikarajkowska': 838324,
 u'douglas\nm.jacobsen': 1249078,
 u'francishygreen': 2011622,
 u'frederickdavidrichardhobbs': 511070,
 u'syedsultanbeevi': 1214527,
 u'tiagosantos': 955558,
 u'sergiizmalynych': 1512761,
 u'brianfoxwell': 118744,
 u'ginov.limmon': 2093433,
 u'geraldinekong': 1362247,
 u'jantachezy': 31171,
 u'luissantom\xe9-collazo': 303004,
 u'anitasun': 1583358,
 u'alessandracarbone': 43082,
 u'nurias\xe1nchez': 2081813,
 u'naomischotte': 327008,
 u'yanivy.munwes': 2227770,
 u'hjpurohit': 440110,
 u'\u8ce2\u53f2\u5927\u6797': 1648849,
 u'kathrynm.szczotka': 1181590,
 u'leandros.oliveira': 263236,
 u'johangorgasbrun': 531484,
 u'shintok.john': 2057463,
 u'alyssaplatt': 366603,
 u'edatan': 791640,
 u'hisatakanuma': 1018271,
 u'jaykumarrangani': 1017203,
 u'adiljankader': 1171995,
 u'katsuyukimaki': 1051242,
 u'janeliebelt': 548388,
 u'johannesechterhoff': 2322697,
 u'katalinak\xe9kesi': 81978,
 u'c\xe9liapais': 102508,
 u'johnm.tokish': 1827493,
 u'k.lakshmi': 142155,
 u'sandraweih': 2305687,
 u'japmengjee': 2135286,
 u'danielwfults': 77042,
 u'christinascherer': 367351,
 u'juliebarnett': 166278,
 u'sindidiko': 537232,
 u'xiu-chengjiao': 2198864,
 u'lynellezahayko': 420157,
 u'shefkixharra': 1759821,
 u'leahn.grandi': 2281304,
 u'soniahernandez': 2064783,
 u'mariadsifaki': 1063330,
 u'samuelk.kutty': 1932890,
 u'lucasliepert': 1583790,
 u'vinitbmahajan': 190542,
 u'frodejohannessen': 1965960,
 u'dirkrvanbockstaele': 1223793,
 u'sabrinar.kendrick': 2112199,
 u'junichirojameskazama': 969857,
 u'joongsubchoi': 1235305,
 u'hyoung-chuljo': 1600950,
 u'federicodalbello': 505048,
 u'masaminanzyo': 267623,
 u'leonidlmoroz': 73878,
 u'nataliestratton': 2281147,
 u'marialourdeseamarillo': 2354451,
 u'jamesh.campbell': 1329175,
 u'christopha.haselwandter': 1871003,
 u'alirezapourebrahimi': 1400430,
 u'qian-gezhu': 2289595,
 u'adamg.tabak': 849961,
 u'simonegrisan': 1020622,
 u'carolinelwatkins': 377916,
 u'robertc.upstill-goddard': 2253352,
 u'miekeh.f.grypdonck': 184694,
 u'juwariamulla': 684281,
 u'mariamcampos': 1614725,
 u'martinposp\xed\u0161ek': 1697439,
 u'namalperera': 2356954,
 u'mayacesari': 1385295,
 u'alabia.okunola': 1085079,
 u'michinorimatsumoto': 191099,
 u'adinayheilbrunn-lang': 541731,
 u'yurihibino': 1204152,
 u'jenniferburns': 468543,
 u'mariacristinalanciacury': 1758320,
 u'tetsuokatsura': 1753175,
 u'analuciamauri': 2284933,
 u'hsin-iliao': 1048069,
 u'heikelux': 2076266,
 u'vincenzomigliaccio': 843738,
 u'faridahhanimshakirin': 716597,
 u'vladim\xedrve\u010derek': 2351534,
 u'l.m.vaanholt': 1905352,
 u'thomasw.kallert': 2269176,
 u'chloemorris': 1149850,
 u'elizabethfrancesbowen': 984750,
 u's.m.fullerton': 1967637,
 u'jacobkarsh': 303502,
 u'nancyyanzhu': 919330,
 u'sheilam.cummingsmacri': 1881545,
 u'ellendotson': 2184547,
 u'thomasroskoden': 1798306,
 u'tsuyoshiharata': 210062,
 u'ronaldoa.p.nagen': 1853179,
 u'pujayadav': 1707999,
 u'ju-huawang': 1563075,
 u'theresekardakis': 377043,
 u'bettym.drees': 1795150,
 u'lindaa.guernsey': 716674,
 u'celiamariadealmeidasoares': 43641,
 u'dong-meiwang': 291037,
 u'guohuiwang': 594643,
 u'katsutoshimiura': 928008,
 u'hakanyi\u011fitba\u015f': 515071,
 u'yutakaokita': 55405,
 u'xudongjiang': 1820265,
 u'zaiwanghuang': 1904859,
 u'ageorgiades': 821580,
 u'hyoung-wonson': 1517497,
 u'abdulakatakweba': 2354715,
 u'colleenm.niswender': 449485,
 u'deepaknamarapurkar': 319108,
 u'michaelj.seitz': 2126348,
 u'mathieujeanmaire': 740915,
 u'rpitakaka': 351088,
 u'julieenticknap': 952829,
 u'elizabethkutter': 1203316,
 u'paul-gerhardschlegel': 1528668,
 u'yiklimkok': 1953235,
 u'laurad.lewis': 64064,
 u'carmeno\xf1ate': 2173628,
 u'francescoiemolo': 175226,
 u'rebekahl.horn': 2170353,
 u'christophwelsch': 1194455,
 u'shijojoseph': 1138600,
 u'se-chuljeong': 1531578,
 u'tamakimabuchi': 1399924,
 u'carlamoran': 1391634,
 u'guillermoreygozalo': 1295456,
 u'arpadbarath': 1737555,
 u'hsing-tingyu': 1001274,
 u'rowang.walker': 863623,
 u'takashinakahari': 1394945,
 u'chrisdenning': 48614,
 u'amayagorostiza': 2221804,
 u'martinaeschmidt': 588392,
 u'yuanm.zhou': 1880754,
 u'takashinakahara': 1393659,
 u'anaf.abraido-lanza': 872798,
 u'doanvannguyen': 116943,
 u'sawadboonpiyathad': 932261,
 u'h\xe9l\xe8nedumesnil': 2188115,
 u'asuferg\xfcny\u0131lmaz': 1784385,
 u'vijayaphanikumaryemparala': 1898035,
 u'irmgardwech': 57527,
 u's.c.chow': 627947,
 u'andrzejszulc': 1210684,
 u'habacucflores-moreno': 1123215,
 u'janir.jensen': 461221,
 u'alfredotorreslarios': 2083090,
 u'iang.barr': 867488,
 u'andr\xe9scastell-rodr\xedguez': 2037913,
 u'abdelmajidbelouchi': 2091369,
 u'audreyruple-czerniak': 519613,
 u'rachelcarmenta': 2132388,
 u'khaledmohamedyounes': 1142327,
 u"sidneyd'mello": 2192737,
 u'minetdewied': 1480200,
 u'phillipa.mcghee': 2044494,
 u'evaterzibasi': 1841738,
 u'fengyuluo': 581062,
 u'elizabethsfrey': 783581,
 u'chanele.smart': 633598,
 u'markd.faries': 1812941,
 u'arnaudbourd\xe9': 996488,
 u'louc.grothaus': 978517,
 u'catherinendungu-case': 2174785,
 u'sandracook': 2380441,
 u'damelankombate': 2081735,
 u'mariagraziasavino': 1869360,
 u'shaider': 398381,
 u'su-chilim': 1432177,
 u'davidr.plas': 789389,
 u'mar\xedag.parra': 2138791,
 u'jongdaebae': 1601032,
 u'panagiotatsitoura': 1831188,
 u'maryfairfield': 684195,
 u'joohwancha': 1557063,
 u'piotrdworzynski': 1417121,
 u'zongchuanlong': 2284215,
 u'ugoantonellogironicarnevale': 280721,
 u'barbaramaccagno': 1616831,
 u'elioraron': 8736,
 u'pjhogg': 792908,
 u'aparberry': 394670,
 u'lanceobauer': 40274,
 u'elizabethdefrancescodaher': 507735,
 u'dorotheadjenkins': 1754471,
 u'aerinyoon': 2228885,
 u'tomp.beresford': 891334,
 u'antonk.pallua': 641461,
 u'ivanomenicucci': 2317798,
 u'annalkhandoga': 1380346,
 u'clifforde.soll': 1251025,
 u'ma\u0142gorzatagraczyk': 1823477,
 u'yu-linhsu': 1364700,
 u'stephenmtollman': 619851,
 u'omart\xednez-maza': 410226,
 u'lukaszjodko': 937453,
 u'pierredewit': 1202833,
 u'juanmuinelo-lorenzo': 1749140,
 u'chiao-fanlin': 1341108,
 u'mkusama': 821593,
 u'peterwatt': 1808232,
 u'kaid.zacharowski': 2139982,
 u'renzhiwang': 455043,
 u'davidanorris': 95135,
 u'robertfbulleit': 783786,
 u'ghulamnabilone': 1206798,
 u'canadykeniscope': 2080479,
 u'marialu\xedsafigueira': 465981,
 u'elizabethj.cook': 2235765,
 u'yukikomatsu': 1163807,
 u'davida.zvara': 1021497,
 u'linam.vargas': 2216270,
 u'kamarularyffinbaharuddin': 1561019,
 u'ronaldplishka': 1884692,
 u'annpont\xe9n': 155513,
 u'flaminiacesaremarincola': 250476,
 u'tforntoft': 390568,
 u'bhekiebrilliancemamba': 1290372,
 u'lilliancollins': 544569,
 u'mostafamoazzami': 1691135,
 u'nolusindisoncitakalo': 1977170,
 u'shu-meilee': 1453193,
 u'erikbrandsborg': 499560,
 u'efthymiavlachopoulou': 1651601,
 u'abigailb.radcliff': 2284456,
 u'gustavoribeirofernandes': 105186,
 u'c.lawless': 1584378,
 u'rubynatale': 674075,
 u'natashacrowcroft': 525474,
 u'vanessadidelez': 456907,
 u'winonacbarker': 443062,
 u'ganizanimlawanda': 562762,
 u'laylamich\xe1n': 303071,
 u'tamazightcherifi': 2290716,
 u'paulcantalupo': 1688811,
 u'fangboxia': 1441995,
 u'christophechassard': 99224,
 u'shidancheng': 1481167,
 u'kyasuda': 397344,
 u'linjunhong': 36803,
 u'r.schwan': 1072624,
 u'mar\xedaj.ortiz': 1250838,
 u'nayumishigihara': 1661992,
 u'ralphacasale': 1712873,
 u'analuciagarippo': 966190,
 u'christinal.graves': 1264442,
 u'mahanteshbnagmoti': 180252,
 u'elizabethnoznesky': 906672,
 u'brunoeymard': 75554,
 u'ralphbeneke': 1137858,
 u'bobbypckoeleman': 221581,
 u"geraldinem.o'neill": 1324014,
 u'timmychiwingchan': 1339678,
 u'sojashamizadeh': 511407,
 u'nambler': 338317,
 u'wan-yili': 91281,
 u'shamsamirdat': 2360787,
 u'catherinea.pastorius': 976094,
 u'j\xf6rndunkel': 864477,
 u's\xf6renturan': 1710038,
 u'renaudf.warin': 2121702,
 u'hyun-mison': 1562725,
 u'monikarudzinska': 758101,
 u'hankeheun-johnson': 1786900,
 u'michelesturgeon': 953023,
 u'j.sadon': 1136290,
 u'a.sachinidis': 160491,
 u'robertmdavid': 1618153,
 u'mariajesusabanal-silao': 1741023,
 u'ipetersen': 392182,
 u'yacoubkhalaf': 1971170,
 u'peggyroberts': 1414062,
 u'yoshieyahagi': 2329340,
 u'annesturcke': 1698989,
 u'annkristinkvam': 353448,
 u'pablomorales': 550522,
 u'amaasantewatamatey': 186952,
 u'mayshawi': 347018,
 u'danielvonrhein': 1437237,
 u'mazhen': 2153484,
 u'tamiriszanforlin': 15115,
 u'teeghanm.e.rambo': 2249073,
 u'kuswanhadi': 1466528,
 u'minhphamvan': 2238195,
 u'sonjasabitzer': 2178603,
 u'taherehazizimotlagh': 722388,
 u'ponthananiyilkumaranbinumon': 2116723,
 u'juttaahnert': 1238945,
 u'bkwagner': 798623,
 u'jacquesflouquet': 1907575,
 u'utlwangbatlang': 1423903,
 u'santiagop\xe9rez-cachafeiro': 1782717,
 u'zhouzhiyi': 874615,
 u'carola.rohl': 238360,
 u'christineameke': 1856209,
 u's.fox': 1147792,
 u'tomaswfitzgerald': 8422,
 u'ybertrand': 390693,
 u'agafarzadehmotlag': 177378,
 u'giovannamilano': 1459188,
 u'm\xf3nicaguxens': 342518,
 u'saleshpchandran': 500097,
 u'harshadadevikatta': 2080578,
 u'stevenedgar': 1130903,
 u'beritbrusletto': 591943,
 u'keithr.kluender': 2115859,
 u'williamjholtz': 1746183,
 u'mohsenmaroufi': 1110225,
 u'hongxingniu': 1535473,
 u'rajashahidashraf': 218124,
 u'samatallah': 1622514,
 u'adame.green': 313126,
 u'christelmjdepooter': 573967,
 u'saral.sawyer': 1170852,
 u'ebtehalsal-abdullah': 1065160,
 u'arunmozhidominic': 303648,
 u'tobyhurd': 812469,
 u'wstolz': 1479472,
 u'heikkio.tikkanen': 842893,
 u'jeffreym.cumming': 2300694,
 u'gaila.shammas': 1502644,
 u'yongyaoxu': 2032120,
 u'normanj.johnson': 1230463,
 u'mohammadhosseinrazi': 1051607,
 u'whitneya.loring': 1334623,
 u'mgweinborn': 2001873,
 u'masakimurase': 1378936,
 u'gautamksahu': 1771843,
 u'st\xe9phanecook': 845456,
 u's.becker': 462132,
 u'magnusjerichardson': 1262114,
 u'lstechly': 411834,
 u'rpeto': 393506,
 u'stephanietiwari': 1866222,
 u'doheepark': 1686597,
 u'danielmraben': 56869,
 u'ichirotabuchi': 50984,
 u'catherinemshanahan': 1117514,
 u'mariahelenaneveslobosilva-filha': 2353125,
 u'miyakoshiraishi': 1716679,
 u'd.petre': 2342097,
 u'danielspeidel': 899492,
 u'simon\xa0j.buczacki': 1971879,
 u'marilyne.crisostomo': 1852539,
 u'marias.balda': 1431712,
 u'guozhixiao': 1066919,
 u'meryemhassouani': 1889420,
 u'j.s.l.brown': 2304920,
 u'javiervillanueva-meyer': 1414049,
 u'dominiqueheymann': 538789,
 u'arthurp.chan': 1099185,
 u'd.c.ompad': 1666959,
 u'binut.kuruvilla': 1485180,
 u'kojijimura': 754307,
 u'martijnm.stuiver': 377817,
 u'sallycriss': 2274048,
 u'danielb.hall': 1179543,
 u'shaoleiwang': 1912205,
 u'yurigori': 2113596,
 u'm.dibattista': 1112620,
 u'jemmelkamp': 1008987,
 u'josiecvankralingen': 1400246,
 u'ricardouauy': 150354,
 u'taketookubo': 2032353,
 u'priscilarossidebatista': 1397231,
 u'hillaryccleveland': 820223,
 u'dionigioprodi': 1838940,
 u'samuelemarro': 1546786,
 u'ijeomaonwagbo': 1774413,
 u'chaodongzhu': 42059,
 u'mahmouddanaee': 2176804,
 u'trentonrfoster': 1809787,
 u'paolacassolino': 779454,
 u'chengyunxu': 1992047,
 u'courtneycrawford': 373831,
 u'campbellchalmers': 653162,
 u'elinorjohn': 685894,
 u'douglasg.suntrup': 956536,
 u'romanvilas': 1459648,
 u'briankirsh': 1577893,
 u'stefanieott': 750834,
 u'carolineschuppli': 2223814,
 u'amarsrivastava': 1924781,
 u'enricogattavecchia': 1324833,
 u'celalettincamci': 764880,
 u'davidgrahamehardie': 87844,
 u'florencenaluyinda-kitabire': 1552439,
 u'chrismwilliam': 58993,
 u'petrjabandziev': 992101,
 u'alinevedder': 749689,
 u'carmencasta\xf1eda': 2110315,
 u'chayin': 2282119,
 u'katherinel.garrison-schilling': 2227049,
 u'giovanninassa': 39252,
 u'monical.assun\xe7\xe3o': 2295778,
 u'xiujieliu': 2043206,
 u'alcinosilva': 1726691,
 u'nanyhairunisa': 2126314,
 u'paulchihmingchihlau': 621602,
 u'rociolopez': 803105,
 u'ar\xe3onogueiradeara\xfajo': 1070887,
 u'valentinadispirito': 2331500,
 u'martinjshipley': 127209,
 u'marinelevittas': 812452,
 u'nathaliehasler-nguyen': 155090,
 u'tammiel.benzinger': 1334657,
 u'denisej.montell': 1688830,
 u'johnjsnyder': 421951,
 u'giovannipasseri': 260987,
 u'ehonkaniemi': 387212,
 u'gengjia': 2367293,
 u'markg.erlander': 632903,
 u'tizianagiuva': 1520164,
 u'hee\u2010jinkim': 222478,
 u'behnamdezfouli': 2349372,
 u'davidldeitcher': 57822,
 u'martinahorvathova': 1778903,
 u'atsukohirota': 1803513,
 u'jenniferjsmith': 145434,
 u'sanamverma': 319560,
 u'virginieprendki': 902159,
 u'\u502b\u535a\u5927\u85cf': 1649355,
 u'bryanhickey': 687329,
 u'jorgef.quarleri': 2067399,
 u'nathalieescaravage': 470534,
 u'stephaniea.kofsky-wofford': 1694861,
 u'juliem.huynh': 1634929,
 u'f.a.o.camargo': 297741,
 u'wenjuanli': 231142,
 u'josephbelagyi': 114897,
 u'yingfenqin': 2269600,
 u'bnsavani': 110933,
 u'jml\xf3pez-oliva': 384104,
 u'jmintz': 1727084,
 u'tadayoshitakeuchi': 1604064,
 u'samuelf.berkovic': 63096,
 u'm.alikhasi': 1532762,
 u'wenluo': 40335,
 u'dougjackson': 2103584,
 u'srujankumargandham': 1216035,
 u'donatellalanari': 345633,
 u'chiaramuggia': 1756582,
 u'jonathonmwillatt': 1666543,
 u'jacquelinelpadilla-gami\xf1o': 1669553,
 u'fankemeng': 1250188,
 u'rachidasoulaymani-bencheikh': 1521154,
 u'dipakkumarbhui': 1507586,
 u'christiandagossou': 1544477,
 u'w.jimzheng': 981728,
 u'alexanderyu.nikitin': 1157617,
 u'emmaj.dunn': 1484757,
 u'dhirajoshi': 812119,
 u'micha\xeblhemmer': 1424473,
 u'karenlaustin': 128357,
 u'xiao-suwang': 711113,
 u'konradzareba': 989603,
 u'bochaocheng': 742787,
 u'josephcolao': 819347,
 u'h.rugo': 617237,
 u'annaderosa': 882110,
 u'mariliagmartins': 2213325,
 u'ayaosaki': 1823824,
 u'lubicabenuskova': 857969,
 u'ionecarvalhopinto': 379308,
 u'hughgelabert': 1607645,
 u'michaeljtarry': 1631214,
 u'shinnwonlim': 2021797,
 u'firasal-niaimi': 922575,
 u'suyoungjeon': 512313,
 u'k.conway': 1240420,
 u'fransd.tichelaar': 1587858,
 u'michaels.green': 143835,
 u'frederickcdebeer': 320165,
 u'guidofschauer': 1891495,
 u'alexanderminovich': 1960299,
 u'petert.seden': 2119486,
 u'davidosterberg': 1185515,
 u'charlesjbanks': 559430,
 u'ranjithkumarkankala': 1450831,
 u'helmutzarbl': 1994610,
 u'mayukoishii-inagaki': 2181446,
 u'ki-junsong': 2021792,
 u'xiangrongchen': 937756,
 u'veyselkars': 420575,
 u'hisayukitono': 901737,
 u'v.ramirez-ramirez': 1778774,
 u'huizhechen': 1837674,
 u'zai-rongzhang': 1054788,
 u'ninaprokoph': 1720056,
 u'trevorstevenson': 558559,
 u'enzomanzato': 703219,
 u'qiu-jinyu': 1446984,
 u'ianshapiro': 2121011,
 u'sylvaindelzon': 470520,
 u'carolswallow': 595155,
 u't.nurmikko': 63826,
 ...}

In [17]:
author = u'martinjshipley'

build full collaboration graph WITH weighted edges for number of times collaborated

IN PROGRESS ON LAPTOP AS OF 11PM 6-27-16

CANCELED at 7am - but started on vagrant ipython notebook session


In [1]:
import graph_tool as gt

In [2]:
def addweightededge(graphobject, source, dest, vertexdict, weight):
    if source not in vertexdict:
        v = graphobject.add_vertex()
        vertexdict[source] = int(v)
    if dest not in vertexdict:
        v = graphobject.add_vertex()
        vertexdict[dest] = int(v)
    if graphobject.edge(vertexdict[source], vertexdict[dest]) == None:
        e = graphobject.add_edge(vertexdict[source], vertexdict[dest])
        weight[e] = 1
    else:
        weight[graphobject.edge(vertexdict[source], vertexdict[dest])] += 1
    return graphobject, vertexdict

In [1]:
import sqlite3
conn = sqlite3.connect('pmcv1-full.db')
c = conn.cursor()

In [ ]:
import itertools
g = gt.Graph(directed = False)
edge_weight = g.new_edge_property("int32_t")
author_vertex_dict = dict()
author_full_name_dict = dict()
c.execute('''SELECT pmid, fn, ln FROM authors''')
authors = c.fetchall()
authorspaper = []
currpaper = authors[0][0]
cnt = 0
for entry in authors:
    #accumulate by paper
    authorcat = unicode(entry[1]+entry[2]).replace(" ", "").lower()
    authorspaper.append(authorcat)
    author_full_name_dict[authorcat] = (entry[1],entry[2])
    if entry[0] != currpaper:
        #add author nodes and edges
        for comb in itertools.combinations(authorspaper, 2):
            addweightededge(g, comb[0], comb[1], author_vertex_dict, edge_weight)
        #reset and begin accumulating again
        currpaper = entry[0]
        authorspaper = []
    cnt += 1
    if cnt % 50000 == 0: print cnt #total 5,745,410 lines of authors in db
edge_weight[g.edges().next()]
maxw = 0 for edge in g.edges(): if edge_weight[edge] > maxw: maxw= edge_weight[edge] print maxw

In [ ]:
import cPickle as pickle
pickle.dump(g, open("authors_full_graph_with_weights.p", "wb"))
pickle.dump(author_vertex_dict, open("authors_vertex_dict_weights.p", "wb"))
pickle.dump(author_full_name_dict, open("authors_full_name_dict_weights.p", "wb"))
pickle.dump(edge_weight, open("authors_full_graph_weights.p", "wb"))

for not working without weights


In [1]:
import cPickle as pickle
g = pickle.load(open("authors_full_graph.p", "rb"))
author_vertex_dict = pickle.load(open("authors_vertex_dict.p", "rb"))
author_full_name_dict = pickle.load(open("authors_full_name_dict.p", "rb"))

In [30]:
rev_author_vertex_dict = {v: k for k, v in author_vertex_dict.items()}

In [2]:
# shortest path
# https://graph-tool.skewed.de/static/doc/topology.html?highlight=shortest%20path#graph_tool.topology.shortest_path

In [3]:
author_full_name_dict


Out[3]:
{u'danutako\u0142o\u017cyn-krajewska': (u'Danuta',
  u'Ko\u0142o\u017cyn-Krajewska'),
 u'gongbifu': (u'GONGBI', u'FU'),
 u'deepakprakashmahara': (u'Deepak Prakash', u'Mahara'),
 u'sarahmweakley': (u'Sarah M', u'Weakley'),
 u'kristinap.vatcheva': (u'Kristina P.', u'Vatcheva'),
 u'jeanpascalherault': (u'Jean Pascal', u'Herault'),
 u'tuulavaskilampi': (u'Tuula', u'Vaskilampi'),
 u'charlesd.warden': (u'Charles D.', u'Warden'),
 u'ashishpathak': (u'Ashish', u'Pathak'),
 u'chih-jaantai': (u'Chih-Jaan', u'Tai'),
 u'stephaniedifrancesco': (u'Stephanie', u'Difrancesco'),
 u'kennethfmanly': (u'Kenneth F', u'Manly'),
 u'carlenayuen': (u'Carlen A', u'Yuen'),
 u'georgezafiriou': (u'George', u'Zafiriou'),
 u'dennisc.sgroi': (u'Dennis C.', u'Sgroi'),
 u'maxjlchen': (u'Max JL', u'Chen'),
 u'cristinawisnivesky-colli': (u'Cristina', u'Wisnivesky-Colli'),
 u'kaiyuezhang': (u'Kaiyue', u'Zhang'),
 u'josephe.leclerc': (u'Joseph E.', u'LeClerc'),
 u'sokratispastromas': (u'Sokratis', u'Pastromas'),
 u'carolinavillagra': (u'Carolina', u'Villagra'),
 u'nancybaker': (u'Nancy', u'Baker'),
 u'tomoshigematsumoto': (u'Tomoshige', u'Matsumoto'),
 u'candanzehraduvan': (u'Candan Zehra', u'Duvan'),
 u'p.perrodo': (u'P.', u'Perrodo'),
 u'esinsoy': (u'Esin', u'Soy'),
 u'marcellooggianu': (u'Marcello', u'Oggianu'),
 u'wilfriedmullens': (u'Wilfried', u'Mullens'),
 u'hassanamohamed': (u'Hassan A', u'Mohamed'),
 u'n.s.bisht': (u'N. S.', u'Bisht'),
 u'razanwafai': (u'Razan', u'Wafai'),
 u'douglasa.marchuk': (u'Douglas A.', u'Marchuk'),
 u'robertacesa': (u'Roberta', u'Cesa'),
 u'robertt.downs': (u'Robert T.', u'Downs'),
 u'fongchunchan': (u'Fong', u'Chun Chan'),
 u'clareperkins': (u'Clare', u'Perkins'),
 u'oloftorper': (u'Olof', u'Torper'),
 u'keithe.szulwach': (u'Keith E.', u'Szulwach'),
 u'giorgisisaac': (u'Giorgis', u'Isaac'),
 u'xingyongzhang': (u'Xingyong', u'Zhang'),
 u'davidcourtin': (u'David', u'Courtin'),
 u'byamahbrianmutamba': (u'Byamah Brian', u'Mutamba'),
 u'javierserrania': (u'Javier', u'Serrania'),
 u'juanantoniomartinez': (u'Juan Antonio', u'Martinez'),
 u'davide.ferrier': (u'David E.', u'Ferrier'),
 u'zhihuitong': (u'Zhihui', u'Tong'),
 u'claudiaallen': (u'Claudia', u'Allen'),
 u'sandervanriet': (u'Sander', u'van Riet'),
 u'davidp.richman': (u'David P.', u'Richman'),
 u'jasmingiller': (u'Jasmin', u'Giller'),
 u'tamiriszanforlinolmosfernandes': (u'Tamiris Zanforlin Olmos', u'Fernandes'),
 u'negusuworku': (u'Negusu', u'Worku'),
 u'edwardseidman': (u'Edward', u'Seidman'),
 u'manuelsoto': (u'Manuel', u'Soto'),
 u'hallahelgad\xf3ttir': (u'Halla', u'Helgad\xf3ttir'),
 u'mihakrofel': (u'Miha', u'Krofel'),
 u'rosscranston': (u'Ross', u'Cranston'),
 u'm.arantxacolchero': (u'M. Arantxa', u'Colchero'),
 u'lazarousmbulo': (u'Lazarous', u'Mbulo'),
 u'lucindam.hilliard': (u'Lucinda M.', u'Hilliard'),
 u'danieldrozdov': (u'Daniel', u'Drozdov'),
 u'natashaluquin': (u'Natasha', u'Luquin'),
 u'm.b\u0142aszczyk': (u'M.', u'B\u0142aszczyk'),
 u'yunhuilu': (u'Yun Hui', u'Lu'),
 u'g\xfcnterjost': (u'G\xfcnter', u'Jost'),
 u'dalanewkitzman': (u'Dalane W', u'Kitzman'),
 u'lindas.williams': (u'Linda S.', u'Williams'),
 u'yousunkim': (u'You Sun', u'Kim'),
 u'i.wong': (u'I.', u'Wong'),
 u'philippelou\xe2pre': (u'Philippe', u'Lou\xe2pre'),
 u'thuythi-thuhuynh': (u'Thuy Thi-Thu', u'Huynh'),
 u'olgaigglessi-markopoulou': (u'Olga', u'Igglessi-Markopoulou'),
 u'istvanjankovics': (u'Istvan', u'Jankovics'),
 u'wilkof.a.r.verbakel': (u'Wilko F. A. R.', u'Verbakel'),
 u'joophermens': (u'Joop', u'Hermens'),
 u'wasifnkhan': (u'Wasif N', u'Khan'),
 u'christinesmoliner': (u'Christine', u'Smoliner'),
 u'manala.rahman': (u'Manal A.', u'Rahman'),
 u'aprila.herbert': (u'April A.', u'Herbert'),
 u'mouhuang': (u'Mou', u'Huang'),
 u'leonardoramos': (u'Leonardo', u'Ramos'),
 u'alpaydinalp': (u'Alp', u'Aydinalp'),
 u'vrosti': (u'V', u'Rosti'),
 u'katjae.wartenberg': (u'Katja E.', u'Wartenberg'),
 u'camilacespedes': (u'Camila', u'Cespedes'),
 u'josl.m.l.lenoble': (u'Jos L.M.L.', u'le Noble'),
 u'shin-ichinakanuma': (u'SHIN-ICHI', u'NAKANUMA'),
 u'rogeriofaustinoribeiro': (u'Rogerio Faustino', u'Ribeiro'),
 u'k.zeppenfeld': (u'K.', u'Zeppenfeld'),
 u'annabonini': (u'Anna', u'Bonini'),
 u'fantasiby': (u'Fanta', u'Siby'),
 u'amiramili': (u'Amira', u'Mili'),
 u'yoshifumibeck': (u'Yoshifumi', u'Beck'),
 u'kaih\xf6ffner': (u'Kai', u'H\xf6ffner'),
 u'ricardoluizdantasmachado': (u'Ricardo Luiz Dantas', u'Machado'),
 u'ritanyilas': (u'Rita', u'Nyilas'),
 u'annaotlewska': (u'Anna', u'Otlewska'),
 u'khaledo.alsaad': (u'Khaled O.', u'Alsaad'),
 u'marijam.petrinovic': (u'Marija M.', u'Petrinovic'),
 u'm.rama': (u'M.', u'Rama'),
 u'yumurao': (u'Yu', u'Murao'),
 u'konstantinospanagiotellis': (u'Konstantinos', u'Panagiotellis'),
 u'randalpbabiuk': (u'Randal P', u'Babiuk'),
 u'ziadelkhatib': (u'Ziad El', u'Khatib'),
 u'hinanoteavaimurphy': (u'Hinano Teavai', u'Murphy'),
 u'shahramheshmat': (u'Shahram', u'Heshmat'),
 u'shunghanchoi': (u'Shung Han', u'Choi'),
 u'm.ramu': (u'M.', u'Ramu'),
 u'kellyormond': (u'Kelly', u'Ormond'),
 u'hodakhoshbakht': (u'Hoda', u'Khoshbakht'),
 u'haraldguendel': (u'Harald', u'Guendel'),
 u'michellel.hamilton': (u'Michelle L.', u'Hamilton'),
 u'marcelop.barros': (u'Marcelo P.', u'Barros'),
 u'sharonoswald': (u'Sharon', u'Oswald'),
 u'dianel.larson': (u'Diane L.', u'Larson'),
 u'ilariadelprincipe': (u'Ilaria', u'Del Principe'),
 u'ninahoque': (u'Nina', u'Hoque'),
 u'\xe9ricvignon': (u'\xc9ric', u'Vignon'),
 u'lauramacdougall': (u'Laura', u'MacDougall'),
 u'niluferersan': (u'Nilufer', u'Ersan'),
 u'aerinyoon': (u'Aerin', u'Yoon'),
 u'dittedemontis': (u'Ditte', u'Demontis'),
 u'sheydashaafi': (u'Sheyda', u'Shaafi'),
 u'silviafragoeiro': (u'Silvia', u'Fragoeiro'),
 u'albertoboffi': (u'Alberto', u'Boffi'),
 u'jackm.loomis': (u'Jack M.', u'Loomis'),
 u'erisatomita': (u'Erisa', u'Tomita'),
 u'mstallman': (u'M S', u'Tallman'),
 u'birgittewalgreen': (u'Birgitte', u'Walgreen'),
 u'marcobaruffi': (u'Marco', u'Baruffi'),
 u'rsch\xf6nenberger': (u'R', u'Sch\xf6nenberger'),
 u'tom\xe1\u0161frant\xedk': (u'Tom\xe1\u0161', u'Frant\xedk'),
 u'xiao-rongzeng': (u'Xiao-rong', u'Zeng'),
 u'jaapkeijer': (u'Jaap', u'Keijer'),
 u'ronc.mease': (u'Ron C.', u'Mease'),
 u'setsukomorinaka': (u'Setsuko', u'Morinaka'),
 u'juanacedano': (u'Juan A', u'Cedano'),
 u'decaijin': (u'Decai', u'Jin'),
 u'alexbalani': (u'Alex', u'Balani'),
 u'albandrialfraidi': (u'Albandri', u'Alfraidi'),
 u'karinsievwright': (u'Karin', u'Sievwright'),
 u'gauravgandhi': (u'Gaurav', u'Gandhi'),
 u'qinzhilee': (u'Qin Zhi', u'Lee'),
 u'ruiliang': (u'Rui', u'Liang'),
 u'sukwanhandali': (u'Sukwan', u'Handali'),
 u'elifi.ekinci': (u'Elif I.', u'Ekinci'),
 u'j\xe9r\xf4meurbain': (u'J\xe9r\xf4me', u'Urbain'),
 u'sheng-fuyou': (u'Sheng-Fu', u'You'),
 u'asafbachrach': (u'Asaf', u'Bachrach'),
 u'alit.al-hinai': (u'Ali T.', u'Al-Hinai'),
 u'rosariopoy': (u'Rosario', u'Poy'),
 u'zhengangtang': (u'Zhengang', u'Tang'),
 u'hsin-hsiunghuang': (u'Hsin-Hsiung', u'Huang'),
 u'delelegnyilmagebremichael': (u'Delelegn Yilma', u'Gebremichael'),
 u'jackt.c.davis': (u'Jack T.C.', u'Davis'),
 u'krittantanarat': (u'Krit', u'Tantanarat'),
 u'marya.asirifi': (u'Mary A.', u'Asirifi'),
 u'emanabdelzaher': (u'Eman', u'Abdelzaher'),
 u'diegoarcelli': (u'Diego', u'Arcelli'),
 u'alihamadanchi': (u'Ali', u'Hamadanchi'),
 u'woonyongkwon': (u'Woon Yong', u'Kwon'),
 u'sung-pyolee': (u'Sung-Pyo', u'Lee'),
 u'dimitragkika': (u'Dimitra', u'Gkika'),
 u'panagiotiszis': (u'Panagiotis', u'Zis'),
 u'wekaufmann': (u'W E', u'Kaufmann'),
 u'alanlyons': (u'Alan', u'Lyons'),
 u'valenzhuoyouyu': (u'Valen Zhuoyou', u'Yu'),
 u'jonathanstrong': (u'Jonathan', u'Strong'),
 u'maral.mberira': (u'Mara L.', u'Mberira'),
 u'nadezdanbabenko': (u'Nadezda N', u'Babenko'),
 u'abdullaha.al-mishari': (u'Abdullah A.', u'Al-Mishari'),
 u'kirstenrichter': (u'Kirsten', u'Richter'),
 u'jordandevaan': (u'Jordan', u'de Vaan'),
 u'nuriagago-l\xf3pez': (u'Nuria', u'Gago-L\xf3pez'),
 u'ericy.zhang': (u'Eric Y.', u'Zhang'),
 u'juanchen': (u'Juan', u'Chen'),
 u'araevuori': (u'A', u'Raevuori'),
 u'roger\xa0l.williams': (u'Roger\xa0L.', u'Williams'),
 u'andreapi\xe9e-staffa': (u'Andrea', u'Pi\xe9e-Staffa'),
 u'camerongreen': (u'Cameron', u'Green'),
 u'beatrizmorancho': (u'Beatriz', u'Morancho'),
 u'brunobetschart': (u'Bruno', u'Betschart'),
 u'hideyukiando': (u'Hideyuki', u'Ando'),
 u'ssahay': (u'S', u'Sahay'),
 u'alisteru.nicol': (u'Alister U.', u'Nicol'),
 u'xi-mingzhang': (u'XI-MING', u'ZHANG'),
 u'r.peravali': (u'R.', u'Peravali'),
 u'barbarauznanska-loch': (u'Barbara', u'Uznanska-Loch'),
 u'danieltcarr': (u'Daniel T', u'Carr'),
 u'irenemiguel-escalada': (u'Irene', u'Miguel-Escalada'),
 u'khaldunm.alazzam': (u'Khaldun M.', u'Al Azzam'),
 u'muyebseo': (u'Mu Yeb', u'Seo'),
 u'hongyangtang': (u'Hongyang', u'Tang'),
 u'sabujacob': (u'Sabu', u'Jacob'),
 u'wenguangyin': (u'Wenguang', u'Yin'),
 u'michaelshill': (u'Michael S', u'Hill'),
 u'aliazhardawasaz': (u'Ali Azhar', u'Dawasaz'),
 u's.m.sim': (u'S. M.', u'Sim'),
 u'emmanuelledeniaud': (u'Emmanuelle', u'Deniaud'),
 u'geruzaalvesdasilva': (u'Geruza Alves', u'da Silva'),
 u'javiercalzada': (u'Javier', u'Calzada'),
 u'ritastangl': (u'Rita', u'Stangl'),
 u'andreatironi': (u'Andrea', u'Tironi'),
 u'pradnyap.kanekar': (u'Pradnya P.', u'Kanekar'),
 u'jwaldmann': (u'J', u'Waldmann'),
 u'norihikotsuchiya': (u'Norihiko', u'Tsuchiya'),
 u'barbaraulmasov': (u'Barbara', u'Ulmasov'),
 u'adamkolondra': (u'Adam', u'Kolondra'),
 u'shailvyas': (u'Shail', u'Vyas'),
 u'g.j.frey': (u'G.J.', u'Frey'),
 u'waynen.mathis': (u'Wayne N.', u'Mathis'),
 u'davidmaboulafia': (u'David M', u'Aboulafia'),
 u'davidm.whiley': (u'David M.', u'Whiley'),
 u'louisemarsh': (u'Louise', u'Marsh'),
 u'weelinwong': (u'Wee', u'Lin Wong'),
 u'nian-songwang': (u'Nian-song', u'Wang'),
 u'adri\xe1nsantana-ramirez': (u'Adri\xe1n', u'SANTANA-RAMIREZ'),
 u'georgiosodimitracopoulos': (u'Georgios O', u'Dimitracopoulos'),
 u'h.bando': (u'H.', u'Bando'),
 u'kristenl.votruba': (u'Kristen L.', u'Votruba'),
 u'katrinalwatson': (u'Katrina L', u'Watson'),
 u'joycedestoppelaar': (u'Joyce', u'de Stoppelaar'),
 u'jun-yakato': (u'Jun-ya', u'Kato'),
 u'hansj.dehaas': (u'Hans J.', u'de Haas'),
 u'akimasahirata': (u'Akimasa', u'Hirata'),
 u'brijendrak.tiwari': (u'Brijendra K.', u'Tiwari'),
 u'robertp.erickson': (u'Robert P.', u'Erickson'),
 u'lejlamutapcicvajzovic': (u'Lejla Mutapcic', u'Vajzovic'),
 u'juanj.caballero-novella': (u'Juan J.', u'Caballero-Novella'),
 u'karlskretting': (u'Karl', u'Skretting'),
 u'chang-jinchoi': (u'Chang-Jin', u'Choi'),
 u'christophsohn': (u'Christoph', u'Sohn'),
 u'sadafkhan': (u'Sadaf', u'Khan'),
 u'lelbarghati': (u'L', u'Elbarghati'),
 u'julianepps': (u'Julian', u'Epps'),
 u'asadimian': (u'Asad I', u'Mian'),
 u'laurencedelacroix': (u'Laurence', u'Delacroix'),
 u'arlend.hanssen': (u'Arlen D.', u'Hanssen'),
 u'y.niida': (u'Y.', u'Niida'),
 u'agnieszkademczuk': (u'Agnieszka', u'Demczuk'),
 u'timothyj.garrett': (u'Timothy J.', u'Garrett'),
 u'gtschurtschenthaler': (u'G', u'Tschurtschenthaler'),
 u'pieterh.kitslaar': (u'Pieter H.', u'Kitslaar'),
 u'pu-hongzhang': (u'Pu-Hong', u'Zhang'),
 u'ant\xf3niopedrocachorodrigues': (u'Ant\xf3nio Pedro Cacho', u'Rodrigues'),
 u'cnmanjunath': (u'CN', u'Manjunath'),
 u'asirvathama.robert': (u'Asirvatham A.', u'Robert'),
 u'katjasremlinger': (u'Katja S', u'Remlinger'),
 u'yasuhirotsutani': (u'YASUHIRO', u'TSUTANI'),
 u'amolnanasahebwagh': (u'Amol Nanasaheb', u'Wagh'),
 u'na\xedlac.donascimento': (u'Na\xedla C.', u'do Nascimento'),
 u'anushasinha': (u'Anusha', u'Sinha'),
 u'hidekinarimatsu': (u'Hideki', u'Narimatsu'),
 u'yasuyukimizuno': (u'Yasuyuki', u'Mizuno'),
 u'maragladstone': (u'Mara', u'Gladstone'),
 u'alexl.kolodkin': (u'Alex L.', u'Kolodkin'),
 u'zinmarlwin': (u'Zin Mar', u'Lwin'),
 u'guanyangzou': (u'Guanyang', u'Zou'),
 u'shu-minhuang': (u'Shu-Min', u'Huang'),
 u'majlindalako': (u'Majlinda', u'Lako'),
 u'carmems.fontanetti': (u'Carmem S.', u'Fontanetti'),
 u'p.w.davenport': (u'P. W.', u'Davenport'),
 u'simonekauffeld': (u'Simone', u'Kauffeld'),
 u'farahtasnim': (u'Farah', u'Tasnim'),
 u'j.m.reynard': (u'J. M.', u'Reynard'),
 u'danielachieffo': (u'Daniela', u'Chieffo'),
 u'fudenghuang': (u'Fudeng', u'Huang'),
 u'dingenalvalstar': (u'Dingena L', u'Valstar'),
 u'galenshi': (u'Galen', u'Shi'),
 u'h.banda': (u'H.', u'Banda'),
 u'jos\xe9augustobarreto-filho': (u'Jos\xe9 Augusto', u'Barreto-Filho'),
 u'annekejvanderkooi': (u'Anneke J', u'van der Kooi'),
 u'pawe\u0142rog\xf3\u017c': (u'Pawe\u0142', u'Rog\xf3\u017c'),
 u'hamedamini': (u'Hamed', u'Amini'),
 u'patr\xedciacsantos': (u'Patr\xedcia C', u'Santos'),
 u'stanblock': (u'Stan', u'Block'),
 u'arvasavada': (u'A R', u'Vasavada'),
 u'marcelojucorrea': (u'Marcelo JU', u'Correa'),
 u'lindam.wakim': (u'Linda M.', u'Wakim'),
 u'guoyuduan': (u'Guoyu', u'Duan'),
 u'yamimaosher': (u'Yamima', u'Osher'),
 u'micheleserri': (u'Michele', u'Serri'),
 u'daviddavis': (u'David', u'Davis'),
 u'jamiesui-lamkwok': (u'Jamie Sui-Lam', u'Kwok'),
 u'jonathanm.mcgavock': (u'Jonathan M.', u'McGavock'),
 u'vikramkishorenavil': (u'Vikram Kishore', u'Navil'),
 u'sierracolavito': (u'Sierra', u'Colavito'),
 u'michaelg.rosenberg': (u'Michael G.', u'Rosenberg'),
 u'christellevasnier': (u'Christelle', u'Vasnier'),
 u'paolakoenen': (u'Paola', u'Koenen'),
 u'jamesa.rusak': (u'James A.', u'Rusak'),
 u'robertburkes': (u'Robert', u'Burkes'),
 u'g.scottgazelle': (u'G. Scott', u'Gazelle'),
 u'r.kemkemer': (u'R.', u'Kemkemer'),
 u'nathana.hotaling': (u'Nathan A.', u'Hotaling'),
 u'jean-paulcadoret': (u'Jean-Paul', u'Cadoret'),
 u'malba': (u'M', u'Alba'),
 u'bernardpeers': (u'Bernard', u'Peers'),
 u'kristinacasteels': (u'Kristina', u'Casteels'),
 u'jameswalugembe': (u'James', u'Walugembe'),
 u'dawnolson': (u'Dawn', u'Olson'),
 u'evgeniavdolgova': (u'Evgenia V', u'Dolgova'),
 u'sakurakouozu': (u'Sakurako', u'Uozu'),
 u'rebeccaslevin': (u'Rebecca S', u'Levin'),
 u'malbu': (u'M', u'Albu'),
 u'yvettegarbe': (u'Yvette', u'Garbe'),
 u'renaudwagner': (u'Renaud', u'Wagner'),
 u'j.poettgen': (u'J.', u'Poettgen'),
 u'sbaran': (u'S', u'Baran'),
 u'annydewilde': (u'Anny', u'Dewilde'),
 u'michaele.bekier': (u'Michael E.', u'Bekier'),
 u'dheerajkalladka': (u'Dheeraj', u'Kalladka'),
 u'perflisberg': (u'Per', u'Flisberg'),
 u'alexandralubina-solomon': (u'Alexandra', u'Lubina-Solomon'),
 u'anna-sarakr\xe5ng': (u'Anna-Sara', u'Kr\xe5ng'),
 u'masafumiumekage': (u'Masafumi', u'Umekage'),
 u'g.yosipovitch': (u'G.', u'Yosipovitch'),
 u'javiera.men\xe9ndez': (u'Javier A.', u'Men\xe9ndez'),
 u'j.w.lee': (u'J. W.', u'Lee'),
 u'elizabethc.oesterle': (u'Elizabeth C.', u'Oesterle'),
 u'karelk-jkuiper': (u'Karel K-J', u'Kuiper'),
 u'hmdavey': (u'H M', u'Davey'),
 u'christiangeraut': (u'Christian', u'Geraut'),
 u'kbennett': (u'K', u'Bennett'),
 u'sabihaabekhoukh': (u'Sabiha', u'Abekhoukh'),
 u'ornellamassa': (u'Ornella', u'Massa'),
 u'wernerhosemann': (u'Werner', u'Hosemann'),
 u'xuminwang': (u'Xumin', u'Wang'),
 u'chang-zhicai': (u'Chang-Zhi', u'Cai'),
 u'judsonaward': (u'Judson A', u'Ward'),
 u'chen-yenchien': (u'Chen-Yen', u'Chien'),
 u'emmanuellebouzigon': (u'Emmanuelle', u'Bouzigon'),
 u'claramartin': (u'Clara', u'Martin'),
 u'rosanadecarvalhocruz': (u'Rosana de Carvalho', u'Cruz'),
 u'yongyeonjeong': (u'Yong Yeon', u'Jeong'),
 u'auemduanprawan': (u'Auemduan', u'Prawan'),
 u'jackiebye': (u'Jackie', u'Bye'),
 u'rnazarian': (u'R', u'Nazarian'),
 u'y.acremann': (u'Y.', u'Acremann'),
 u'davidjamor': (u'David J', u'Amor'),
 u'briank.walker': (u'Brian K.', u'Walker'),
 u'davidc.riccio': (u'David C.', u'Riccio'),
 u'josephfschad': (u'Joseph F', u'Schad'),
 u'anttisarela': (u'Antti', u'Sarela'),
 u'marcaattiyeh': (u'Marc A', u'Attiyeh'),
 u'octaviocarvajal-zarrabal': (u'Octavio', u'Carvajal-Zarrabal'),
 u'v.glenntarcea': (u'V. Glenn', u'Tarcea'),
 u'tomonobukanasugi': (u'Tomonobu', u'Kanasugi'),
 u'mcemcfadyen': (u'M C E', u'McFadyen'),
 u'patrickkarcher': (u'Patrick', u'Karcher'),
 u'yinclin': (u'Yin C', u'Lin'),
 u'jamesb.uney': (u'James B.', u'Uney'),
 u'lancea.stechschulte': (u'Lance A.', u'Stechschulte'),
 u'ghimsiongow': (u'Ghim Siong', u'Ow'),
 u'aimonk.alkanani': (u'Aimon K.', u'Alkanani'),
 u'eduardofern\xe1ndez-cruz': (u'Eduardo', u'Fern\xe1ndez-Cruz'),
 u'thomash.darrah': (u'Thomas H.', u'Darrah'),
 u'jian-boxie': (u'Jian-Bo', u'Xie'),
 u'benjaminb.green': (u'Benjamin B.', u'Green'),
 u'roberthawes': (u'Robert', u'Hawes'),
 u'maryfroberts': (u'Mary F', u'Roberts'),
 u'josephe.perales': (u'Joseph E.', u'Perales'),
 u'de-changli': (u'De-Chang', u'Li'),
 u'jessicapalmer': (u'Jessica', u'Palmer'),
 u'hichamlahlou': (u'Hicham', u'Lahlou'),
 u'josipbegovac': (u'Josip', u'Begovac'),
 u'takeshiohta': (u'Takeshi', u'Ohta'),
 u'akirasassa': (u'Akira', u'Sassa'),
 u'j.t.paz': (u'J. T.', u'Paz'),
 u'shu-minyang': (u'Shu-Min', u'Yang'),
 u'chih-zenchang': (u'Chih-Zen', u'Chang'),
 u'kathys.evans': (u'Kathy S.', u'Evans'),
 u'mariajos\xe8sisalli': (u'Maria Jos\xe8', u'Sisalli'),
 u'aprilwatanabe': (u'April', u'Watanabe'),
 u'francescotaus': (u'Francesco', u'Taus'),
 u'brittee.lowther': (u'Britte E.', u'Lowther'),
 u'd.l.robinson': (u'D. L.', u'Robinson'),
 u's\xf3nnicagal\xe1n-gil': (u'S\xf3nnica', u'Gal\xe1n-Gil'),
 u'alfredoraglio': (u'Alfredo', u'Raglio'),
 u'a.dziurda': (u'A.', u'Dziurda'),
 u'toshiyamanabe': (u'Toshiya', u'Manabe'),
 u'carolineschluth-bolard': (u'Caroline', u'Schluth-Bolard'),
 u'susanfischer': (u'Susan', u'Fischer'),
 u'manuelacalder\xf3n': (u'Manuela', u'Calder\xf3n'),
 u'kgreiner': (u'K', u'Greiner'),
 u'myong-jinkang': (u'Myong-Jin', u'Kang'),
 u'albertogianinetti': (u'Alberto', u'Gianinetti'),
 u'christodoulosistefanadis': (u'Christodoulos I', u'Stefanadis'),
 u'maciejurbaniak': (u'Maciej', u'Urbaniak'),
 u'shekharmallick': (u'Shekhar', u'Mallick'),
 u'jen-hotseng': (u'Jen-Ho', u'Tseng'),
 u'alexanderi.f.simpson': (u'Alexander I. F.', u'Simpson'),
 u'joseluismota-rodriguez': (u'Jose Luis', u'Mota-Rodriguez'),
 u'ga\xebllediserens': (u'Ga\xeblle', u'Diserens'),
 u'shinagawayoko': (u'Shinagawa', u'Yoko'),
 u'svenmagnuscarlsen': (u'Sven Magnus', u'Carlsen'),
 u'michaelgsboylan': (u'Michael GS', u'Boylan'),
 u'michelejmaiers': (u'Michele J', u'Maiers'),
 u'lisagoebel': (u'Lisa', u'Goebel'),
 u'helleholmhansson': (u'Helle Holm', u'Hansson'),
 u'nikolaos\xa0p.mastroyiannopoulos': (u'Nikolaos\xa0P.',
  u'Mastroyiannopoulos'),
 u'harshdhar': (u'Harsh', u'Dhar'),
 u'jemalmhamid': (u'Jemal M', u'Hamid'),
 u'fatmasarac': (u'Fatma', u'Sarac'),
 u'deborahhung': (u'Deborah', u'Hung'),
 u'christoskrogias': (u'Christos', u'Krogias'),
 u'jitendramangwani': (u'Jitendra', u'Mangwani'),
 u'dyonimatiasdeoliveira': (u'Dyoni Matias', u'de Oliveira'),
 u'toshironagasawa': (u'Toshiro', u'Nagasawa'),
 u'byoungheonkang': (u'Byoung Heon', u'Kang'),
 u'ievgeniiaa.tiukova': (u'Ievgeniia A.', u'Tiukova'),
 u'brianrbarrows': (u'Brian R', u'Barrows'),
 u'rossanavermiglio': (u'Rossana', u'Vermiglio'),
 u'b\xe9lapapp': (u'B\xe9la', u'Papp'),
 u'changgookang': (u'Chang Goo', u'Kang'),
 u'j.decolongon': (u'J.', u'Decolongon'),
 u'sabinehummert': (u'Sabine', u'Hummert'),
 u'a.a.khajetoorians': (u'A. A.', u'Khajetoorians'),
 u'cliffs.han': (u'Cliff S.', u'Han'),
 u'anne-meretesoja': (u'Anne-Merete', u'Soja'),
 u'mirrej.p.simons': (u'Mirre J. P.', u'Simons'),
 u'somagupta': (u'Soma', u'Gupta'),
 u'ewasiwak': (u'Ewa', u'Siwak'),
 u'iliastsiflikas': (u'Ilias', u'Tsiflikas'),
 u'kenichitakeshita': (u'Kenichi', u'Takeshita'),
 u'wouteradreschler': (u'Wouter A', u'Dreschler'),
 u'claudiozuniga': (u'Claudio', u'Zuniga'),
 u'toniarussell': (u'Tonia', u'Russell'),
 u'jiankuang': (u'Jian', u'Kuang'),
 u'apostolossarivalasis': (u'Apostolos', u'Sarivalasis'),
 u'liang-shunyou': (u'Liang-shun', u'You'),
 u'samueljarbes': (u'Samuel J', u'Arbes'),
 u'magalibouhours': (u'Magali', u'Bouhours'),
 u'leermobley': (u'Lee R', u'Mobley'),
 u'berislavlisnic': (u'Berislav', u'Lisnic'),
 u'jassirwitta': (u'Jassir', u'Witta'),
 u'azlarabmasrar': (u'Azlarab', u'Masrar'),
 u'yangchunliu': (u'Yangchun', u'Liu'),
 u'mavisli': (u'Mavis', u'Li'),
 u'gilesn.johnson': (u'Giles N.', u'Johnson'),
 u'lawrencef.pupulim': (u'Lawrence F.', u'Pupulim'),
 u'juliaanderson': (u'Julia', u'Anderson'),
 u'christophera.maloney': (u'Christopher A.', u'Maloney'),
 u'paulinam.dominiak': (u'Paulina M.', u'Dominiak'),
 u'washingtonlsvieira': (u'Washington LS', u'Vieira'),
 u'rebeccah.stoloff': (u'Rebecca H.', u'Stoloff'),
 u'hosseinkhalili': (u'Hossein', u'Khalili'),
 u'amandaldauphinee': (u'Amanda L', u'Dauphinee'),
 u'guangpingwang': (u'Guangping', u'Wang'),
 u'arnoldg.e.leenders': (u'Arnold G. E.', u'Leenders'),
 u'c.v.harinarayan': (u'C. V.', u'Harinarayan'),
 u'abdullahal-shimemeri': (u'Abdullah', u'Al-Shimemeri'),
 u'aryojamshidpey': (u'Aryo', u'Jamshidpey'),
 u'albertjbecker': (u'Albert J', u'Becker'),
 u'evanthiadiamanti-kandarakis': (u'Evanthia', u'Diamanti-Kandarakis'),
 u'howardsfox': (u'Howard S', u'Fox'),
 u'oliviersallou': (u'Olivier', u'Sallou'),
 u'anne-mettehaase': (u'Anne-Mette', u'Haase'),
 u'sigridhoyer-fender': (u'Sigrid', u'Hoyer-Fender'),
 u'nicolaasg.jaspers': (u'Nicolaas G.', u'Jaspers'),
 u'rienishitani': (u'Rie', u'Nishitani'),
 u'annae.barmintseva': (u'Anna E.', u'Barmintseva'),
 u'vleberre': (u'V', u'Le Berre'),
 u'motohirowakui': (u'Motohiro', u'Wakui'),
 u'cl\xedmacocano': (u'Cl\xedmaco', u'Cano'),
 u'angeldelgado': (u'Angel', u'Delgado'),
 u'davidw.cuthbertson': (u'David W.', u'Cuthbertson'),
 u'douglaswhite': (u'Douglas', u'White'),
 u'mwihakikimura': (u'Mwihaki', u'Kimura'),
 u'matijazupan': (u'Matija', u'Zupan'),
 u'evawalla': (u'Eva', u'Walla'),
 u'hiroshiokubo': (u'Hiroshi', u'Okubo'),
 u'lukec.kingry': (u'Luke C.', u'Kingry'),
 u'grzegorzkreiner': (u'Grzegorz', u'Kreiner'),
 u'chimingjin': (u'Chiming', u'Jin'),
 u'j.khadake': (u'J.', u'Khadake'),
 u'ryokokishi': (u'Ryoko', u'Kishi'),
 u'jerelm.ezell': (u'Jerel M.', u'Ezell'),
 u'amalaal-rusaiess': (u'Amal A', u'Al-Rusaiess'),
 u'johannesvonlintig': (u'Johannes', u'von Lintig'),
 u'randallgriffith': (u'Randall', u'Griffith'),
 u'kennethfearon': (u'Kenneth', u'Fearon'),
 u'yisiangng': (u'Yi Siang', u'Ng'),
 u'nathaliemandjee': (u'Nathalie', u'Mandjee'),
 u'e.benizeau': (u'E.', u'Benizeau'),
 u'ovidiuhoreabedreag': (u'Ovidiu Horea', u'Bedreag'),
 u'paulmeredith': (u'Paul', u'Meredith'),
 u'mustafabasbozkurt': (u'Mustafa', u'Basbozkurt'),
 u'monikarajkowska': (u'Monika', u'Rajkowska'),
 u'douglas\nm.jacobsen': (u'Douglas\nM.', u'Jacobsen'),
 u'antonioalaimo': (u'Antonio', u'Alaimo'),
 u'dae-heehan': (u'Dae-Hee', u'Han'),
 u'syedsultanbeevi': (u'Syed Sultan', u'Beevi'),
 u'tiagosantos': (u'Tiago', u'Santos'),
 u'brianfoxwell': (u'Brian', u'Foxwell'),
 u'janetacurran': (u'Janet A', u'Curran'),
 u'ginov.limmon': (u'Gino V.', u'Limmon'),
 u'jamesc.lin': (u'James C.', u'Lin'),
 u'geraldinekong': (u'Geraldine', u'Kong'),
 u'jantachezy': (u'Jan', u'Tachezy'),
 u'luissantom\xe9-collazo': (u'Luis', u'Santom\xe9-Collazo'),
 u'vancebeck': (u'Vance', u'Beck'),
 u'alessandracarbone': (u'Alessandra', u'Carbone'),
 u'nurias\xe1nchez': (u'Nuria', u'S\xe1nchez'),
 u'naomischotte': (u'Naomi SC', u'Hotte'),
 u'mohammedy.hassan': (u'Mohammed Y.', u'Hassan'),
 u'hjpurohit': (u'HJ', u'Purohit'),
 u'kariniproper': (u'Karin I', u'Proper'),
 u'dongseobtark': (u'Dongseob', u'Tark'),
 u'kathrynm.szczotka': (u'Kathryn M.', u'Szczotka'),
 u'nicolasabreu': (u'Nicolas', u'Abreu'),
 u'leandros.oliveira': (u'Leandro S.', u'Oliveira'),
 u'johangorgasbrun': (u'Johan Gorgas', u'Brun'),
 u'x.d.tao': (u'X. D.', u'Tao'),
 u'edatan': (u'E', u'Datan'),
 u'hisatakanuma': (u'Hisataka', u'Numa'),
 u'jaykumarrangani': (u'Jaykumar', u'Rangani'),
 u'adiljankader': (u'Adiljan', u'Kader'),
 u'katsuyukimaki': (u'Katsuyuki', u'Maki'),
 u'janeliebelt': (u'Jan E', u'Liebelt'),
 u'johannesechterhoff': (u'Johannes', u'Echterhoff'),
 u'hideotsuji': (u'Hideo', u'Tsuji'),
 u'j\xe9r\xf4meferet': (u'J\xe9r\xf4me', u'Feret'),
 u'c\xe9liapais': (u'C\xe9lia', u'Pais'),
 u'johnm.tokish': (u'John M.', u'Tokish'),
 u'k.lakshmi': (u'K.', u'Lakshmi'),
 u'danielwfults': (u'Daniel W', u'Fults'),
 u'christinascherer': (u'Christina', u'Scherer'),
 u'juliebarnett': (u'Julie', u'Barnett'),
 u'sindidiko': (u'Sindi', u'Diko'),
 u'xiu-chengjiao': (u'Xiu-cheng', u'Jiao'),
 u'karitveteinngjerdingen': (u'Kari Tvete', u'Inngjerdingen'),
 u'shefkixharra': (u'Shefki', u'Xharra'),
 u'stevend.siciliano': (u'Steven D.', u'Siciliano'),
 u'bert-janfvanbeijnum': (u'Bert-Jan F', u'van Beijnum'),
 u'mariadsifaki': (u'Maria D', u'Sifaki'),
 u'samuelk.kutty': (u'Samuel K.', u'Kutty'),
 u'lucasliepert': (u'Lucas', u'Liepert'),
 u'annleechang': (u'Ann Lee', u'Chang'),
 u'phillippam.cumberland': (u'Phillippa M.', u'Cumberland'),
 u'dirkrvanbockstaele': (u'Dirk R', u'Van Bockstaele'),
 u'junichirojameskazama': (u'Junichiro James', u'Kazama'),
 u'joongsubchoi': (u'Joong Sub', u'Choi'),
 u'mcrobson': (u'MC', u'Robson'),
 u'federicodalbello': (u'Federico Dal', u'Bello'),
 u'josepenalvo': (u'Jose', u'Penalvo'),
 u'leonidlmoroz': (u'Leonid L', u'Moroz'),
 u'nataliestratton': (u'Natalie', u'Stratton'),
 u'marialourdeseamarillo': (u'Maria Lourdes E', u'Amarillo'),
 u'jamesh.campbell': (u'James H.', u'Campbell'),
 u'christopha.haselwandter': (u'Christoph A.', u'Haselwandter'),
 u'adamg.tabak': (u'Adam G.', u'Tabak'),
 u'simonegrisan': (u'Simone', u'Grisan'),
 u'carolinelwatkins': (u'Caroline L', u'Watkins'),
 u'robertc.upstill-goddard': (u'Robert C.', u'Upstill-Goddard'),
 u'miekeh.f.grypdonck': (u'Mieke H. F.', u'Grypdonck'),
 u'juwariamulla': (u'Juwaria', u'Mulla'),
 u'mariamcampos': (u'Maria M', u'Campos'),
 u'martinposp\xed\u0161ek': (u'Martin', u'Posp\xed\u0161ek'),
 u'namalperera': (u'Namal', u'Perera'),
 u'sophieswwang': (u'Sophie SW', u'Wang'),
 u'mayacesari': (u'Maya', u'Cesari'),
 u'alabia.okunola': (u'Alabi A.', u'Okunola'),
 u'michinorimatsumoto': (u'Michinori', u'Matsumoto'),
 u'jacekszopinski': (u'Jacek', u'Szopinski'),
 u'adinayheilbrunn-lang': (u'Adina Y', u'Heilbrunn-Lang'),
 u'yurihibino': (u'Yuri', u'Hibino'),
 u'helenk.delichatsios': (u'Helen K.', u'Delichatsios'),
 u'jenniferburns': (u'Jennifer', u'Burns'),
 u'justinvlouis': (u'Justin V', u'Louis'),
 u'qi-binghuang': (u'Qi-Bing', u'Huang'),
 u'hsin-iliao': (u'Hsin-I', u'Liao'),
 u'heikelux': (u'Heike', u'Lux'),
 u'vincenzomigliaccio': (u'Vincenzo', u'Migliaccio'),
 u'faridahhanimshakirin': (u'Faridah Hanim', u'Shakirin'),
 u'l.m.vaanholt': (u'L. M.', u'Vaanholt'),
 u'smsheehan': (u'SM', u'Sheehan'),
 u'chloemorris': (u'Chloe', u'Morris'),
 u'jiyongjung': (u'Ji Yong', u'Jung'),
 u'elizabethfrancesbowen': (u'Elizabeth Frances', u'Bowen'),
 u'jacobkarsh': (u'Jacob', u'Karsh'),
 u'williamhkitchens': (u'William H', u'Kitchens'),
 u'nancyyanzhu': (u'Nancy', u'Yan Zhu'),
 u'alisham.mendonsa': (u'Alisha M.', u'Mendonsa'),
 u'thomasroskoden': (u'Thomas', u'Roskoden'),
 u'danielm.morobadi': (u'Daniel M.', u'Morobadi'),
 u'yu-kyungkim': (u'Yu-Kyung', u'Kim'),
 u'pujayadav': (u'Puja', u'Yadav'),
 u'ju-huawang': (u'Ju-Hua', u'Wang'),
 u'theresekardakis': (u'Therese', u'Kardakis'),
 u'bettym.drees': (u'Betty M.', u'Drees'),
 u'jing-weichi': (u'Jing-Wei', u'Chi'),
 u'celiamariadealmeidasoares': (u'Celia Maria de Almeida', u'Soares'),
 u'dong-meiwang': (u'Dong-Mei', u'Wang'),
 u'guohuiwang': (u'Guohui', u'Wang'),
 u'katsutoshimiura': (u'Katsutoshi', u'Miura'),
 u'hakanyi\u011fitba\u015f': (u'Hakan', u'Yi\u011fitba\u015f'),
 u'montgomerymartin': (u'Montgomery', u'Martin'),
 u'eugeniapapaliodi': (u'Eugenia', u'Papaliodi'),
 u'zaiwanghuang': (u'Zaiwang', u'Huang'),
 u'sawsanfeki': (u'Sawsan', u'Feki'),
 u'ageorgiades': (u'A', u'Georgiades'),
 u'c.deighton': (u'C.', u'Deighton'),
 u'abdulakatakweba': (u'Abdul A', u'Katakweba'),
 u'colleenm.niswender': (u'Colleen M.', u'Niswender'),
 u'deepaknamarapurkar': (u'Deepak N', u'Amarapurkar'),
 u'michaelj.seitz': (u'Michael J.', u'Seitz'),
 u'mathieujeanmaire': (u'Mathieu', u'Jeanmaire'),
 u'rpitakaka': (u'R', u'Pitakaka'),
 u'julieenticknap': (u'Julie', u'Enticknap'),
 u'elizabethkutter': (u'Elizabeth', u'Kutter'),
 u'paul-gerhardschlegel': (u'Paul-Gerhard', u'Schlegel'),
 u'yiklimkok': (u'Yik Lim', u'Kok'),
 u'laurad.lewis': (u'Laura D.', u'Lewis'),
 u'francescoiemolo': (u'Francesco', u'Iemolo'),
 u'jonathanh.teichroeb': (u'Jonathan H.', u'Teichroeb'),
 u'christophwelsch': (u'Christoph', u'Welsch'),
 u'shijojoseph': (u'Shijo', u'Joseph'),
 u'se-chuljeong': (u'Se-Chul', u'Jeong'),
 u'tamakimabuchi': (u'Tamaki', u'Mabuchi'),
 u'carlamoran': (u'Carla', u'Moran'),
 u'silviatorrespedraza': (u'Silvia', u'Torres Pedraza'),
 u'arpadbarath': (u'Arpad', u'Barath'),
 u'hsing-tingyu': (u'Hsing-Ting', u'Yu'),
 u'rowang.walker': (u'Rowan G.', u'Walker'),
 u'takashinakahari': (u'Takashi', u'Nakahari'),
 u'chrisdenning': (u'Chris', u'Denning'),
 u'amayagorostiza': (u'Amaya', u'Gorostiza'),
 u'martinaeschmidt': (u'Martina E', u'Schmidt'),
 u'karthikroykonda': (u'Karthik Roy', u'Konda'),
 u'yuanm.zhou': (u'Yuan M.', u'Zhou'),
 u'takashinakahara': (u'Takashi', u'Nakahara'),
 u'anaf.abraido-lanza': (u'Ana F.', u'Abraido-Lanza'),
 u'doanvannguyen': (u'Doan', u'Van Nguyen'),
 u'sawadboonpiyathad': (u'Sawad', u'Boonpiyathad'),
 u'robertgrosselfinger': (u'Robert', u'Grosselfinger'),
 u'vijayaphanikumaryemparala': (u'Vijayaphanikumar', u'Yemparala'),
 u'irmgardwech': (u'Irmgard', u'Wech'),
 u's.c.chow': (u'S. C.', u'Chow'),
 u'nastasiyaf.grinberg': (u'Nastasiya F.', u'Grinberg'),
 u'andrzejszulc': (u'Andrzej', u'Szulc'),
 u'habacucflores-moreno': (u'Habacuc', u'Flores-Moreno'),
 u'alfredotorreslarios': (u'Alfredo', u'Torres Larios'),
 u'iang.barr': (u'Ian G.', u'Barr'),
 u'abdelmajidbelouchi': (u'Abdelmajid', u'Belouchi'),
 u'audreyruple-czerniak': (u'Audrey', u'Ruple-Czerniak'),
 u'rachelcarmenta': (u'Rachel', u'Carmenta'),
 u'adrianaramos-ordo\xf1ez': (u'Adriana', u'Ramos-Ordo\xf1ez'),
 u'khaledmohamedyounes': (u'Khaled Mohamed', u'Younes'),
 u"sidneyd'mello": (u'Sidney', u"D'Mello"),
 u'minetdewied': (u'Minet', u'de Wied'),
 u'phillipa.mcghee': (u'Phillip A.', u'McGhee'),
 u'evaterzibasi': (u'Eva', u'Terzibasi'),
 u'fengyuluo': (u'Feng Yu', u'Luo'),
 u'elizabethsfrey': (u'Elizabeth S', u'Frey'),
 u'chanele.smart': (u'Chanel E.', u'Smart'),
 u'markd.faries': (u'Mark D.', u'Faries'),
 u'arnaudbourd\xe9': (u'Arnaud', u'Bourd\xe9'),
 u'louc.grothaus': (u'Lou C.', u'Grothaus'),
 u'catherinendungu-case': (u'Catherine', u'Ndungu-Case'),
 u'patriziaangelico': (u'Patrizia', u'Angelico'),
 u'sandracook': (u'Sandra', u'Cook'),
 u'shaider': (u'S', u'Haider'),
 u'su-chilim': (u'Su-Chi', u'Lim'),
 u'davidr.plas': (u'David R.', u'Plas'),
 u'mar\xedag.parra': (u'Mar\xeda G.', u'Parra'),
 u'samantapaltrinieri': (u'Samanta', u'Paltrinieri'),
 u'joohwancha': (u'Joo Hwan', u'Cha'),
 u'piotrdworzynski': (u'Piotr', u'Dworzynski'),
 u'zongchuanlong': (u'Zong', u'Chuanlong'),
 u'ugoantonellogironicarnevale': (u'Ugo Antonello', u'Gironi Carnevale'),
 u'barbaramaccagno': (u'Barbara', u'Maccagno'),
 u'elioraron': (u'Eliora', u'Ron'),
 u'pjhogg': (u'P J', u'Hogg'),
 u'haticeaakdogan': (u'Hatice A', u'Akdogan'),
 u'aparberry': (u'A', u'Parberry'),
 u'lanceobauer': (u'Lance O', u'Bauer'),
 u'cherrilbowman': (u'Cherril', u'Bowman'),
 u'dorotheadjenkins': (u'Dorothea D', u'Jenkins'),
 u'tomp.beresford': (u'Tom P.', u'Beresford'),
 u'antonk.pallua': (u'Anton K.', u'Pallua'),
 u'ivanomenicucci': (u'Ivano', u'Menicucci'),
 u'annalkhandoga': (u'Anna L', u'Khandoga'),
 u'clifforde.soll': (u'Clifford E.', u'Soll'),
 u'ma\u0142gorzatagraczyk': (u'Ma\u0142gorzata', u'Graczyk'),
 u'yu-linhsu': (u'Yu-Lin', u'Hsu'),
 u'stephenmtollman': (u'Stephen M', u'Tollman'),
 u'omart\xednez-maza': (u'O', u'Mart\xednez-Maza'),
 u'lukaszjodko': (u'Lukasz', u'Jodko'),
 u'pierredewit': (u'Pierre', u'De Wit'),
 u'juanmuinelo-lorenzo': (u'Juan', u'Muinelo-Lorenzo'),
 u'junshizhao': (u'Junshi', u'Zhao'),
 u'mkusama': (u'M', u'Kusama'),
 u'peterwatt': (u'Peter', u'Watt'),
 u'renzhiwang': (u'Renzhi', u'Wang'),
 u'davidanorris': (u'David A', u'Norris'),
 u'robertfbulleit': (u'Robert F', u'Bulleit'),
 u'ghulamnabilone': (u'Ghulam Nabi', u'Lone'),
 u'bongokkim': (u'Bong Ok', u'Kim'),
 u'canadykeniscope': (u'Canady', u'Keniscope'),
 u'urbana.kiernan': (u'Urban A.', u'Kiernan'),
 u'marianemstefani': (u'Mariane M', u'Stefani'),
 u'yukikomatsu': (u'Yuki', u'Komatsu'),
 u'seyedrezaraeeskarami': (u'Seyed Reza', u'Raeeskarami'),
 u'linam.vargas': (u'Lina', u'M. Vargas'),
 u'kamarularyffinbaharuddin': (u'Kamarul Aryffin', u'Baharuddin'),
 u'ronaldplishka': (u'Ronald', u'Plishka'),
 u'annpont\xe9n': (u'Ann', u'Pont\xe9n'),
 u'flaminiacesaremarincola': (u'Flaminia', u'Cesare Marincola'),
 u'tforntoft': (u'T F', u'Orntoft'),
 u'bhekiebrilliancemamba': (u'Bhekie Brilliance', u'Mamba'),
 u'lilliancollins': (u'Lillian', u'Collins'),
 u'mostafamoazzami': (u'Mostafa', u'Moazzami'),
 u'nolusindisoncitakalo': (u'Nolusindiso', u'Ncitakalo'),
 u'gerdag.fillenbaum': (u'Gerda G.', u'Fillenbaum'),
 u'guillaumemarrelec': (u'Guillaume', u'Marrelec'),
 u'efthymiavlachopoulou': (u'Efthymia', u'Vlachopoulou'),
 u'gustavoribeirofernandes': (u'Gustavo Ribeiro', u'Fernandes'),
 u'chrisburtner': (u'Chris', u'Burtner'),
 u'natashacrowcroft': (u'Natasha', u'Crowcroft'),
 u'vanessadidelez': (u'Vanessa', u'Didelez'),
 u'winonacbarker': (u'Winona C', u'Barker'),
 u'lishi': (u'Li', u'Shi'),
 u'laylamich\xe1n': (u'Layla', u'Mich\xe1n'),
 u'suhailamohdsauid': (u'Suhaila', u'Mohd Sauid'),
 u'rachellek.gould': (u'Rachelle K.', u'Gould'),
 u'fangboxia': (u'Fangbo', u'Xia'),
 u'christophechassard': (u'Christophe', u'Chassard'),
 u'shidancheng': (u'Shi Dan', u'Cheng'),
 u'kyasuda': (u'K', u'Yasuda'),
 u'linjunhong': (u'Linjun', u'Hong'),
 u'amitpathak': (u'Amit', u'Pathak'),
 u'r.schwan': (u'R.', u'Schwan'),
 u'mar\xedaj.ortiz': (u'Mar\xeda J.', u'Ortiz'),
 u'nayumishigihara': (u'Nayumi', u'Shigihara'),
 u'ralphacasale': (u'Ralph A', u'Casale'),
 u'analuciagarippo': (u'Ana Lucia', u'Garippo'),
 u'christinal.graves': (u'Christina L.', u'Graves'),
 u'mahanteshbnagmoti': (u'Mahantesh B', u'Nagmoti'),
 u'elizabethnoznesky': (u'Elizabeth', u'Noznesky'),
 u'brunoeymard': (u'Bruno', u'Eymard'),
 u'ralphbeneke': (u'Ralph', u'Beneke'),
 u'bobbypckoeleman': (u'Bobby PC', u'Koeleman'),
 u"geraldinem.o'neill": (u'Geraldine M.', u"O'Neill"),
 u'timmychiwingchan': (u'Timmy Chi Wing', u'Chan'),
 u'sojashamizadeh': (u'Soja', u'Shamizadeh'),
 u'nambler': (u'N', u'Ambler'),
 u'wan-yili': (u'Wan-Yi', u'Li'),
 u'andreafern\xe1ndez-vidal': (u'Andrea', u'Fern\xe1ndez-Vidal'),
 u'catherinea.pastorius': (u'Catherine A.', u'Pastorius'),
 u'j\xf6rndunkel': (u'J\xf6rn', u'Dunkel'),
 u's\xf6renturan': (u'S\xf6ren', u'Turan'),
 u'monikarudzinska': (u'Monika', u'Rudzinska'),
 u'hankeheun-johnson': (u'Hanke', u'Heun-Johnson'),
 u'danf.smelter': (u'Dan F.', u'Smelter'),
 u'j.sadon': (u'J.', u'Sadon'),
 u'a.sachinidis': (u'A.', u'Sachinidis'),
 u'mariajesusabanal-silao': (u'Maria Jesusa', u'Banal-Silao'),
 u'tianyiyan': (u'Tianyi', u'Yan'),
 u'ipetersen': (u'I', u'Petersen'),
 u'yacoubkhalaf': (u'Yacoub', u'Khalaf'),
 u'peggyroberts': (u'Peggy', u'Roberts'),
 u'annesturcke': (u'Anne', u'Sturcke'),
 u'hiroyukiyoshitomi': (u'Hiroyuki', u'Yoshitomi'),
 u'pablomorales': (u'Pablo', u'Morales'),
 u'amaasantewatamatey': (u'Ama Asantewa', u'Tamatey'),
 u'mayshawi': (u'May', u'Shawi'),
 u'julienmandon': (u'Julien', u'Mandon'),
 u'danielvonrhein': (u'Daniel', u'von Rhein'),
 u'mazhen': (u'Ma', u'Zhen'),
 u'takuyayamagishi': (u'Takuya', u'Yamagishi'),
 u'sonjasabitzer': (u'Sonja', u'Sabitzer'),
 u'huaweizeng': (u'Huawei', u'Zeng'),
 u'ponthananiyilkumaranbinumon': (u'Ponthananiyil Kumaran', u'Binumon'),
 u'juttaahnert': (u'Jutta', u'Ahnert'),
 u'bkwagner': (u'B K', u'Wagner'),
 u'jacquesflouquet': (u'Jacques', u'Flouquet'),
 u'muhammadz.hashmi': (u'Muhammad Z.', u'Hashmi'),
 u'utlwangbatlang': (u'Utlwang', u'Batlang'),
 u'randallm.story': (u'Randall M.', u'Story'),
 u'zhouzhiyi': (u'Zhou', u'Zhiyi'),
 u'carola.rohl': (u'Carol A.', u'Rohl'),
 u'richardgorlick': (u'Richard', u'Gorlick'),
 u's.fox': (u'S.', u'Fox'),
 u'e.j.m.nieveenvandijkum': (u'E. J. M.', u'Nieveen van Dijkum'),
 u'tomaswfitzgerald': (u'Tomas W', u'Fitzgerald'),
 u'agafarzadehmotlag': (u'A', u'Gafarzadeh Motlag'),
 u'giovannamilano': (u'Giovanna', u'Milano'),
 u'seon-minlee': (u'Seon-Min', u'Lee'),
 u'm\xf3nicaguxens': (u'M\xf3nica', u'Guxens'),
 u'saleshpchandran': (u'Salesh P', u'Chandran'),
 u'yoelr.garcia\ndiaz': (u'Yoel R.', u'Garcia\nDiaz'),
 u'melaniehasler': (u'Melanie', u'Hasler'),
 u'yk\xe4helariutta': (u'Yk\xe4', u'Helariutta'),
 u'kotaroyamazaki': (u'Kotaro', u'Yamazaki'),
 u'stevenedgar': (u'Steven', u'Edgar'),
 u'stevenjoffe': (u'Steven', u'Joffe'),
 u'keithr.kluender': (u'Keith R.', u'Kluender'),
 u'williamjholtz': (u'William J', u'Holtz'),
 u'mohsenmaroufi': (u'Mohsen', u'Maroufi'),
 u'hongxingniu': (u'Hongxing', u'Niu'),
 u'rajashahidashraf': (u'Raja', u'Shahid Ashraf'),
 u's.r.seema': (u'S. R.', u'Seema'),
 u'dariuszdanel': (u'Dariusz', u'Danel'),
 u'adame.green': (u'Adam E.', u'Green'),
 u'christelmjdepooter': (u'Christel MJ', u'De Pooter'),
 u'saral.sawyer': (u'Sara L.', u'Sawyer'),
 u'ebtehalsal-abdullah': (u'Ebtehal S', u'Al-Abdullah'),
 u'mariai.harrell': (u'Maria I.', u'Harrell'),
 u'yi-haowang': (u'Yi-Hao', u'Wang'),
 u'tobyhurd': (u'Toby', u'Hurd'),
 u'karenslindeman': (u'Karen S', u'Lindeman'),
 u'jeffreym.cumming': (u'Jeffrey M.', u'Cumming'),
 u'gaila.shammas': (u'Gail A.', u'Shammas'),
 u'yongyaoxu': (u'YongYao', u'Xu'),
 u'normanj.johnson': (u'Norman J.', u'Johnson'),
 u'mohammadhosseinrazi': (u'Mohammad Hossein', u'Razi'),
 u'whitneya.loring': (u'Whitney A.', u'Loring'),
 u'mgweinborn': (u'M G', u'Weinborn'),
 u'masakimurase': (u'Masaki', u'Murase'),
 u'gautamksahu': (u'Gautam K', u'Sahu'),
 u'st\xe9phanecook': (u'St\xe9phane', u'Cook'),
 u'namitaroy-chowdhury': (u'Namita', u'Roy-Chowdhury'),
 u'magnusjerichardson': (u'Magnus J E', u'Richardson'),
 u'lstechly': (u'L', u'Stechly'),
 u'robertaach': (u'Robert A', u'Ach'),
 u'cristanacallieri': (u'Cristana', u'Callieri'),
 u'doheepark': (u'Do Hee', u'Park'),
 u'danielmraben': (u'Daniel M', u'Raben'),
 u'ichirotabuchi': (u'Ichiro', u'Tabuchi'),
 u'markush.-y.fritz': (u'Markus H.-Y.', u'Fritz'),
 u'mariahelenaneveslobosilva-filha': (u'Maria Helena Neves Lobo',
  u'Silva-Filha'),
 u'danielspeidel': (u'Daniel', u'Speidel'),
 u'anuragdagar': (u'Anurag', u'Dagar'),
 u'marilyne.crisostomo': (u'Marilyn E.', u'Crisostomo'),
 u'mustafagunes': (u'Mustafa', u'Gunes'),
 u'pongsakutaisincharoen': (u'Pongsak', u'Utaisincharoen'),
 u'meryemhassouani': (u'Meryem', u'Hassouani'),
 u'j.s.l.brown': (u'J. S. L.', u'Brown'),
 u'javiervillanueva-meyer': (u'Javier', u'Villanueva-Meyer'),
 u'dominiqueheymann': (u'Dominique', u'Heymann'),
 u'arthurp.chan': (u'Arthur P.', u'Chan'),
 u'spicciotto': (u'S', u'Picciotto'),
 u'binut.kuruvilla': (u'BINU T.', u'KURUVILLA'),
 u'kojijimura': (u'Koji', u'Jimura'),
 u'martijnm.stuiver': (u'Martijn M.', u'Stuiver'),
 u'sallycriss': (u'Sally', u'Criss'),
 u'danielb.hall': (u'Daniel B.', u'Hall'),
 u'nicholass.britt': (u'Nicholas S.', u'Britt'),
 u'shaoleiwang': (u'Shaolei', u'Wang'),
 u'd.ristic': (u'D.', u'Ristic'),
 u'm.dibattista': (u'M.', u'Di Battista'),
 u'jemmelkamp': (u'J', u'Emmelkamp'),
 u'ricardouauy': (u'Ricardo', u'Uauy'),
 u'priscilarossidebatista': (u'Priscila Rossi de', u'Batista'),
 u'hillaryccleveland': (u'Hillary C', u'Cleveland'),
 u'samuelemarro': (u'Samuele', u'Marro'),
 u'ijeomaonwagbo': (u'Ijeoma O', u'Nwagbo'),
 u'chaodongzhu': (u'Chaodong', u'Zhu'),
 u'mahmouddanaee': (u'Mahmoud', u'Danaee'),
 u'trentonrfoster': (u'Trenton R', u'Foster'),
 u'paolacassolino': (u'Paola', u'Cassolino'),
 u'chengyunxu': (u'Chengyun', u'Xu'),
 u'nicoletandonian': (u'Nicole T', u'Andonian'),
 u'n.edwards': (u'N.', u'Edwards'),
 u'elinorjohn': (u'Elinor', u'John'),
 u'douglasg.suntrup': (u'Douglas G.', u'Suntrup'),
 u'romanvilas': (u'Roman', u'Vilas'),
 u'stefanieott': (u'Stefanie', u'Ott'),
 u'amarsrivastava': (u'Amar', u'Srivastava'),
 u'enricogattavecchia': (u'Enrico', u'Gattavecchia'),
 u'celalettincamci': (u'Celalettin', u'Camci'),
 u'davidgrahamehardie': (u'David Grahame', u'Hardie'),
 u'florencenaluyinda-kitabire': (u'Florence', u'Naluyinda-Kitabire'),
 u'chrismwilliam': (u'Chris M', u'William'),
 u'petrjabandziev': (u'Petr', u'Jabandziev'),
 u'alinevedder': (u'Aline', u'Vedder'),
 u'carmencasta\xf1eda': (u'Carmen', u'Casta\xf1eda'),
 u'philipptobiasmeyer': (u'Philipp Tobias', u'Meyer'),
 u'giovanninassa': (u'Giovanni', u'Nassa'),
 u'monical.assun\xe7\xe3o': (u'Monica L.', u'Assun\xe7\xe3o'),
 u'ji-yuhlee': (u'Ji-Yuh', u'Lee'),
 u'a.reina': (u'A.', u'Reina'),
 u'taijirodoi': (u'Taijiro', u'Doi'),
 u'paulchihmingchihlau': (u'Paul Chih Ming Chih', u'Lau'),
 u'davidg.kaufman': (u'David G.', u'Kaufman'),
 u'rociolopez': (u'Rocio', u'Lopez'),
 u'elisabethm.lodder': (u'Elisabeth M.', u'Lodder'),
 u'martinjshipley': (u'Martin J', u'Shipley'),
 u'marinelevittas': (u'Marine', u'Levittas'),
 u'nathaliehasler-nguyen': (u'Nathalie', u'Hasler-Nguyen'),
 u'tammiel.benzinger': (u'Tammie L.', u'Benzinger'),
 u'denisej.montell': (u'Denise J.', u'Montell'),
 u'johnjsnyder': (u'John J', u'Snyder'),
 u'giovannipasseri': (u'Giovanni', u'Passeri'),
 u'ehonkaniemi': (u'E', u'Honkaniemi'),
 u'gengjia': (u'Geng', u'Jia'),
 u'markg.erlander': (u'Mark G.', u'Erlander'),
 u'tizianagiuva': (u'Tiziana', u'Giuva'),
 u'hee\u2010jinkim': (u'Hee\u2010Jin', u'Kim'),
 u'ignacioobando': (u'Ignacio', u'Obando'),
 u'davidldeitcher': (u'David L', u'Deitcher'),
 u'alexanderh.bentley': (u'Alexander H.', u'Bentley'),
 u'atsukohirota': (u'Atsuko', u'Hirota'),
 u'jenniferjsmith': (u'Jennifer J', u'Smith'),
 u'sanamverma': (u'Sanam', u'Verma'),
 u'\u502b\u535a\u5927\u85cf': (u'\u502b\u535a', u'\u5927\u85cf'),
 u'bryanhickey': (u'Bryan', u'Hickey'),
 u'nathalieescaravage': (u'Nathalie', u'Escaravage'),
 u'stephaniea.kofsky-wofford': (u'Stephanie A.', u'Kofsky-Wofford'),
 u'juliem.huynh': (u'Julie M.', u'Huynh'),
 u'wenjuanlv': (u'Wenjuan', u'Lv'),
 u'wenjuanli': (u'Wenjuan', u'Li'),
 u'josephbelagyi': (u'Joseph', u'Belagyi'),
 u'bnsavani': (u'B N', u'Savani'),
 u'jonnaukkola': (u'Jonna', u'Ukkola'),
 u'jmintz': (u'J', u'Mintz'),
 u'tadayoshitakeuchi': (u'Tadayoshi', u'TAKEUCHI'),
 u'samuelf.berkovic': (u'Samuel F.', u'Berkovic'),
 u'm.alikhasi': (u'M.', u'Alikhasi'),
 u'ashamsshahemabadi': (u'A', u'Shams Shahemabadi'),
 u'wenluo': (u'Wen', u'Luo'),
 u'dougjackson': (u'Doug', u'Jackson'),
 u'srujankumargandham': (u'Srujan Kumar', u'Gandham'),
 u'donatellalanari': (u'Donatella', u'Lanari'),
 u'bertoltgust': (u'Bertolt', u'Gust'),
 u'anav.cruz': (u'Ana V.', u'Cruz'),
 u'jonathonmwillatt': (u'Jonathon M', u'Willatt'),
 u'jacquelinelpadilla-gami\xf1o': (u'Jacqueline L', u'Padilla-Gami\xf1o'),
 u'fankemeng': (u'Fanke', u'Meng'),
 u'shu-yuguo': (u'Shu-Yu', u'Guo'),
 u'rachidasoulaymani-bencheikh': (u'Rachida', u'Soulaymani-Bencheikh'),
 u'thomaszambelis': (u'Thomas', u'Zambelis'),
 u'christiandagossou': (u'Christian D', u'Agossou'),
 u'w.jimzheng': (u'W. Jim', u'Zheng'),
 u'alexanderyu.nikitin': (u'Alexander Yu.', u'Nikitin'),
 u'emmaj.dunn': (u'Emma J.', u'Dunn'),
 u'alans.l.yu': (u'Alan S. L.', u'Yu'),
 u'dhirajoshi': (u'Dhira', u'Joshi'),
 u'micha\xeblhemmer': (u'Micha\xebl', u'Hemmer'),
 u'karenlaustin': (u'Karen L', u'Austin'),
 u'xiao-suwang': (u'Xiao-Su', u'Wang'),
 u'konradzareba': (u'Konrad', u'Zareba'),
 u'bochaocheng': (u'Bochao', u'Cheng'),
 u'josephcolao': (u'Joseph', u'Colao'),
 u'balasubramanianvasanthakumar': (u'Balasubramanian', u'Vasanthakumar'),
 u'annaderosa': (u'Anna', u'De Rosa'),
 u'ayaosaki': (u'Aya', u'Osaki'),
 u'lubicabenuskova': (u'Lubica', u'Benuskova'),
 u'stefank\xf6nemann': (u'Stefan', u'K\xf6nemann'),
 u'd.m.morse': (u'D. M.', u'Morse'),
 u'hughgelabert': (u'Hugh', u'Gelabert'),
 u'dcarey': (u'D', u'Carey'),
 u'shinnwonlim': (u'Shinn Won', u'Lim'),
 u'firasal-niaimi': (u'Firas', u'Al-Niaimi'),
 u'suyoungjeon': (u'Su Young', u'Jeon'),
 u'k.conway': (u'K.', u'Conway'),
 u'toshiharuarishima': (u'Toshiharu', u'Arishima'),
 u'michaels.green': (u'Michael S.', u'Green'),
 u'frederickcdebeer': (u'Frederick C', u'de Beer'),
 u'guidofschauer': (u'Guido F', u'Schauer'),
 u'alexanderminovich': (u'Alexander', u'Minovich'),
 u'davidosterberg': (u'David', u'Osterberg'),
 u'charlesjbanks': (u'Charles J', u'Banks'),
 u'ranjithkumarkankala': (u'Ranjith Kumar', u'Kankala'),
 u'helmutzarbl': (u'Helmut', u'Zarbl'),
 u'ki-junsong': (u'Ki-Jun', u'Song'),
 u'xiangrongchen': (u'Xiangrong', u'Chen'),
 u'veyselkars': (u'Veysel', u'Kars'),
 u'hisayukitono': (u'Hisayuki', u'Tono'),
 u'huizhechen': (u'Huizhe', u'Chen'),
 u'zai-rongzhang': (u'Zai-Rong', u'Zhang'),
 u'bethhalfyard': (u'Beth', u'Halfyard'),
 u'trevorstevenson': (u'Trevor', u'Stevenson'),
 u'zeljkograbarevic': (u'Zeljko', u'Grabarevic'),
 u'enzomanzato': (u'Enzo', u'Manzato'),
 u'qiu-jinyu': (u'Qiu-Jin', u'Yu'),
 u'sylvaindelzon': (u'Sylvain', u'Delzon'),
 u'carolswallow': (u'Carol', u'Swallow'),
 u'krismovig': (u'Kris', u'Movig'),
 u't.nurmikko': (u'T.', u'Nurmikko'),
 u'lisamarendt': (u'Lisa M', u'Arendt'),
 u'lbjorde': (u'LB', u'Jorde'),
 u'arnolda.j.bartels': (u'Arnold A. J.', u'Bartels'),
 u'khushalbrijwani': (u'Khushal', u'Brijwani'),
 u'stanleyealthof': (u'Stanley E', u'Althof'),
 u'wen-shanhuang': (u'Wen-Shan', u'Huang'),
 u'yufanguo': (u'Yufan', u'Guo'),
 u'serenellaserani': (u'Serenella', u'Serani'),
 u'yapingwang': (u'Yaping', u'Wang'),
 u'leahhickey': (u'Leah', u'Hickey'),
 u'chongxu': (u'Chong', u'Xu'),
 u'guillaumeduthoit': (u'Guillaume', u'Duthoit'),
 u'randallf.d\u2019souza': (u'Randall F.', u'D\u2019Souza'),
 u'shigeyoshisoga': (u'Shigeyoshi', u'Soga'),
 u'saskiavanliempt': (u'Saskia', u'van Liempt'),
 u'sunilronad': (u'Sunil', u'Ronad'),
 u'kathrinecfernandez': (u'Kathrine C', u'Fernandez'),
 u'suelileikotakamatsugoyat\xe1': (u'Sueli Leiko Takamatsu', u'Goyat\xe1'),
 u'kevinm.dougherty': (u'Kevin M.', u'Dougherty'),
 ...}

In [7]:
author_vertex_dict[u'kennethfmanly']
author_vertex_dict[u'nancybaker']


Out[7]:
1870875

In [71]:
import graph_tool.topology as topology

In [73]:
vlist, elist = topology.shortest_path(g, g.vertex(author_vertex_dict[u'kennethfmanly']),
                                      g.vertex(author_vertex_dict[u'zhihuitong']))

In [ ]:
#topology.absolute_import

In [72]:
#topology.all_shortest_paths(g, g.vertex(author_vertex_dict[u'kennethfmanly']),
                                      g.vertex(author_vertex_dict[u'zhihuitong']))


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-72-4844329df113> in <module>()
----> 1 topology.all_shortest_paths(g, g.vertex(author_vertex_dict[u'kennethfmanly']),
      2                                       g.vertex(author_vertex_dict[u'zhihuitong']))

AttributeError: 'module' object has no attribute 'all_shortest_paths'

In [36]:
for edge in elist: 
    test= edge

In [46]:
int(test.target())


Out[46]:
327

In [91]:
def addedge(graphobject, source, dest, vertexdict, v_label):
    if source not in vertexdict:
        v = graphobject.add_vertex()
        vertexdict[source] = int(v)
        v_label[v] = str(rev_pmid_vertex_dict[dest])
    if dest not in vertexdict:
        v = graphobject.add_vertex()
        vertexdict[dest] = int(v)
        v_label[v] = str(rev_pmid_vertex_dict[dest])
    graphobject.add_edge(vertexdict[source], vertexdict[dest])
    return graphobject, vertexdict, v_label
       
import graph_tool as gt
                
def buildauthorgraph(nodelist, edgelist, authordict, mastergraph):
    _g =gt.Graph(directed = False)
    _vertexdict = dict()
    v_label = _g.new_vertex_property("string")
    #first add nodes
    for node in nodelist:
        v = _g.add_vertex()
        _vertexdict[node] = int(v)
        v_label[v] = str(author_full_name_dict[authordict[node]]).strip('()').strip("'").replace("u'", "").replace("',", "")
    #now add edges
    for edge in edgelist:
        _g.add_edge(_vertexdict[int(edge.source())], _vertexdict[int(edge.target())])
    return _g, _vertexdict, v_label

In [92]:
authg, authdict, authlab = buildauthorgraph(vlist, elist, rev_author_vertex_dict, g)

In [90]:
str(author_full_name_dict[rev_author_vertex_dict[11]]).strip('()').strip("'").replace("u'", "").replace("',", "")


Out[90]:
'Shinichi Miyake'

In [145]:
#see formatting at https://graph-tool.skewed.de/static/doc/draw.html
import graph_tool.all as gt
deg = authg.degree_property_map("out") #out AND in how?
gt.graph_draw(authg, vertex_fill_color=deg, vertex_text=authlab, output_size=(600,300), 
              vertex_text_position=3.14/4., vertex_size = 20, vertex_font_size = 20,
              edge_pen_width = 6, inline=True)


Out[145]:
<PropertyMap object with key type 'Vertex' and value type 'vector<double>', for Graph 0x16e99be10, at 0x10c2789d0>

In [146]:
#all_shortest_paths not working on ubuntu either - may need debian
#http://main-discussion-list-for-the-graph-tool-project.982480.n3.nabble.com/Debian-package-and-boost-at-compile-time-td4026383i20.html
#https://graph-tool.skewed.de/static/doc/search_module.html#graph_tool.search.dijkstra_search

In [1]:
%reset -f
import cPickle as pickle
g = pickle.load(open("authors_full_graph.p", "rb"))
author_vertex_dict = pickle.load(open("authors_vertex_dict.p", "rb"))
author_full_name_dict = pickle.load(open("authors_full_name_dict.p", "rb"))
rev_author_vertex_dict = {v: k for k, v in author_vertex_dict.items()}

In [ ]: